HBASE-16464 archive folder grows bigger and bigger due to corrupt snapshot under tmp dir
This commit is contained in:
parent
97b164ac38
commit
3909b7c96f
|
@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.Stoppable;
|
import org.apache.hadoop.hbase.Stoppable;
|
||||||
|
import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
|
||||||
|
@ -300,7 +301,21 @@ public class SnapshotFileCache implements Stoppable {
|
||||||
FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir);
|
FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir);
|
||||||
if (running != null) {
|
if (running != null) {
|
||||||
for (FileStatus run : running) {
|
for (FileStatus run : running) {
|
||||||
|
try {
|
||||||
snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
|
snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
|
||||||
|
} catch (CorruptedSnapshotException e) {
|
||||||
|
// See HBASE-16464
|
||||||
|
if (e.getCause() instanceof FileNotFoundException) {
|
||||||
|
// If the snapshot is not in progress, we will delete it
|
||||||
|
if (!fs.exists(new Path(run.getPath(),
|
||||||
|
SnapshotDescriptionUtils.SNAPSHOT_IN_PROGRESS))) {
|
||||||
|
fs.delete(run.getPath(), true);
|
||||||
|
LOG.warn("delete the " + run.getPath() + " due to exception:", e.getCause());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return snapshotInProgress;
|
return snapshotInProgress;
|
||||||
|
|
|
@ -164,7 +164,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh
|
||||||
try {
|
try {
|
||||||
// If regions move after this meta scan, the region specific snapshot should fail, triggering
|
// If regions move after this meta scan, the region specific snapshot should fail, triggering
|
||||||
// an external exception that gets captured here.
|
// an external exception that gets captured here.
|
||||||
|
SnapshotDescriptionUtils.createInProgressTag(workingDir, fs);
|
||||||
// write down the snapshot info in the working directory
|
// write down the snapshot info in the working directory
|
||||||
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
|
SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
|
||||||
snapshotManifest.addTableDescriptor(this.htd);
|
snapshotManifest.addTableDescriptor(this.htd);
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.snapshot;
|
package org.apache.hadoop.hbase.snapshot;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
|
@ -104,6 +105,9 @@ public final class SnapshotDescriptionUtils {
|
||||||
|
|
||||||
/** Temporary directory under the snapshot directory to store in-progress snapshots */
|
/** Temporary directory under the snapshot directory to store in-progress snapshots */
|
||||||
public static final String SNAPSHOT_TMP_DIR_NAME = ".tmp";
|
public static final String SNAPSHOT_TMP_DIR_NAME = ".tmp";
|
||||||
|
|
||||||
|
/** This tag will be created in in-progess snapshots */
|
||||||
|
public static final String SNAPSHOT_IN_PROGRESS = ".inprogress";
|
||||||
// snapshot operation values
|
// snapshot operation values
|
||||||
/** Default value if no start time is specified */
|
/** Default value if no start time is specified */
|
||||||
public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
|
public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
|
||||||
|
@ -292,6 +296,16 @@ public final class SnapshotDescriptionUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create in-progress tag under .tmp of in-progress snapshot
|
||||||
|
* */
|
||||||
|
public static void createInProgressTag(Path workingDir, FileSystem fs) throws IOException {
|
||||||
|
FsPermission perms = FSUtils.getFilePermissions(fs, fs.getConf(),
|
||||||
|
HConstants.DATA_FILE_UMASK_KEY);
|
||||||
|
Path snapshot_in_progress = new Path(workingDir, SnapshotDescriptionUtils.SNAPSHOT_IN_PROGRESS);
|
||||||
|
FSUtils.create(fs, snapshot_in_progress, perms, true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read in the {@link org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription} stored for the snapshot in the passed directory
|
* Read in the {@link org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription} stored for the snapshot in the passed directory
|
||||||
* @param fs filesystem where the snapshot was taken
|
* @param fs filesystem where the snapshot was taken
|
||||||
|
|
|
@ -82,6 +82,9 @@ public final class SnapshotManifestV2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void regionClose(final SnapshotRegionManifest.Builder region) throws IOException {
|
public void regionClose(final SnapshotRegionManifest.Builder region) throws IOException {
|
||||||
|
// we should ensure the snapshot dir exist, maybe it has been deleted by master
|
||||||
|
// see HBASE-16464
|
||||||
|
if (fs.exists(snapshotDir)) {
|
||||||
SnapshotRegionManifest manifest = region.build();
|
SnapshotRegionManifest manifest = region.build();
|
||||||
FSDataOutputStream stream = fs.create(getRegionManifestPath(snapshotDir, manifest));
|
FSDataOutputStream stream = fs.create(getRegionManifestPath(snapshotDir, manifest));
|
||||||
try {
|
try {
|
||||||
|
@ -89,6 +92,9 @@ public final class SnapshotManifestV2 {
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
LOG.warn("can't write manifest without parent dir, maybe it has been deleted by master?");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public SnapshotRegionManifest.FamilyFiles.Builder familyOpen(
|
public SnapshotRegionManifest.FamilyFiles.Builder familyOpen(
|
||||||
|
|
|
@ -168,4 +168,24 @@ public class TestSnapshotHFileCleaner {
|
||||||
fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true);
|
fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HBASE-16464
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMissedTmpSnapshot() throws IOException {
|
||||||
|
SnapshotTestingUtils.SnapshotMock
|
||||||
|
snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir);
|
||||||
|
SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2(
|
||||||
|
SNAPSHOT_NAME_STR, TABLE_NAME_STR);
|
||||||
|
builder.addRegionV2();
|
||||||
|
builder.missOneRegionSnapshotFile();
|
||||||
|
|
||||||
|
long period = Long.MAX_VALUE;
|
||||||
|
SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000,
|
||||||
|
"test-snapshot-file-cache-refresh", new SnapshotFiles());
|
||||||
|
cache.getSnapshotsInProgress();
|
||||||
|
assertFalse(fs.exists(builder.getSnapshotsDir()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -583,6 +583,18 @@ public final class SnapshotTestingUtils {
|
||||||
corruptFile(p);
|
corruptFile(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void missOneRegionSnapshotFile() throws IOException {
|
||||||
|
FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir);
|
||||||
|
for (FileStatus fileStatus : manifestFiles) {
|
||||||
|
String fileName = fileStatus.getPath().getName();
|
||||||
|
if (fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOTINFO_FILE)
|
||||||
|
|| fileName.endsWith(".tabledesc")
|
||||||
|
|| fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) {
|
||||||
|
fs.delete(fileStatus.getPath(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Corrupt data-manifest file
|
* Corrupt data-manifest file
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue