diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index 09b0c94b365..df03d63db2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; import org.apache.hadoop.hbase.util.FSUtils; @@ -60,10 +61,12 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { public synchronized Iterable getDeletableFiles(Iterable files) { try { return cache.getUnreferencedFiles(files); + } catch (CorruptedSnapshotException cse) { + LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse); } catch (IOException e) { LOG.error("Exception while checking if files were valid, keeping them just in case.", e); - return Collections.emptyList(); } + return Collections.emptyList(); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java index b32a1b5bfe3..df0c348039d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.snapshot; import com.google.protobuf.CodedInputStream; +import com.google.protobuf.InvalidProtocolBufferException; import java.io.FileNotFoundException; import java.io.IOException; @@ -370,6 +371,9 @@ public final class SnapshotManifest { try { v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc); v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc); + } catch (InvalidProtocolBufferException e) { + throw new CorruptedSnapshotException("unable to parse region manifest " + + e.getMessage(), e); } finally { tpool.shutdown(); } @@ -524,6 +528,8 @@ public final class SnapshotManifest { return SnapshotDataManifest.parseFrom(cin); } catch (FileNotFoundException e) { return null; + } catch (InvalidProtocolBufferException e) { + throw new CorruptedSnapshotException("unable to parse data manifest " + e.getMessage(), e); } finally { if (in != null) in.close(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java index 2df9bef9f43..3bb3575f3f6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.snapshot; +import com.google.protobuf.InvalidProtocolBufferException; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; @@ -58,7 +59,7 @@ public final class SnapshotManifestV2 { public static final int DESCRIPTOR_VERSION = 2; - private static final String SNAPSHOT_MANIFEST_PREFIX = "region-manifest."; + public static final String SNAPSHOT_MANIFEST_PREFIX = "region-manifest."; private SnapshotManifestV2() {} @@ -154,9 +155,15 @@ public final class SnapshotManifestV2 { } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } catch (ExecutionException e) { - IOException ex = new IOException(); - ex.initCause(e.getCause()); - throw ex; + Throwable t = e.getCause(); + + if(t instanceof InvalidProtocolBufferException) { + throw (InvalidProtocolBufferException)t; + } else { + IOException ex = new IOException("ExecutionException"); + ex.initCause(e.getCause()); + throw ex; + } } return regionsManifest; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java index 5e5b0047350..b4355baf6a0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java @@ -20,7 +20,11 @@ package org.apache.hadoop.hbase.master.snapshot; import static org.junit.Assert.assertFalse; import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -28,12 +32,16 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -43,13 +51,26 @@ import org.junit.experimental.categories.Category; @Category({MasterTests.class, SmallTests.class}) public class TestSnapshotHFileCleaner { + private static final Log LOG = LogFactory.getLog(TestSnapshotFileCache.class); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final String TABLE_NAME_STR = "testSnapshotManifest"; + private static final String SNAPSHOT_NAME_STR = "testSnapshotManifest-snapshot"; + private static Path rootDir; + private static FileSystem fs; + + /** + * Setup the test environment + */ + @BeforeClass + public static void setup() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + rootDir = FSUtils.getRootDir(conf); + fs = FileSystem.get(conf); + } + @AfterClass public static void cleanup() throws IOException { - Configuration conf = TEST_UTIL.getConfiguration(); - Path rootDir = FSUtils.getRootDir(conf); - FileSystem fs = FileSystem.get(conf); // cleanup fs.delete(rootDir, true); } @@ -87,4 +108,64 @@ public class TestSnapshotHFileCleaner { // make sure that the file isn't deletable assertFalse(cleaner.isFileDeletable(fs.getFileStatus(refFile))); } + + class SnapshotFiles implements SnapshotFileCache.SnapshotFileInspector { + public Collection filesUnderSnapshot(final Path snapshotDir) throws IOException { + Collection files = new HashSet(); + files.addAll(SnapshotReferenceUtil.getHFileNames(TEST_UTIL.getConfiguration(), fs, snapshotDir)); + return files; + } + } + + /** + * If there is a corrupted region manifest, it should throw out CorruptedSnapshotException, + * instead of an IOException + */ + @Test + public void testCorruptedRegionManifest() throws IOException { + SnapshotTestingUtils.SnapshotMock + snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2( + SNAPSHOT_NAME_STR, TABLE_NAME_STR); + builder.addRegionV2(); + builder.corruptOneRegionManifest(); + + long period = Long.MAX_VALUE; + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + try { + cache.getSnapshotsInProgress(); + } catch (CorruptedSnapshotException cse) { + LOG.info("Expected exception " + cse); + } finally { + fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true); + } + } + + /** + * If there is a corrupted data manifest, it should throw out CorruptedSnapshotException, + * instead of an IOException + */ + @Test + public void testCorruptedDataManifest() throws IOException { + SnapshotTestingUtils.SnapshotMock + snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2( + SNAPSHOT_NAME_STR, TABLE_NAME_STR); + builder.addRegionV2(); + // consolidate to generate a data.manifest file + builder.consolidate(); + builder.corruptDataManifest(); + + long period = Long.MAX_VALUE; + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + try { + cache.getSnapshotsInProgress(); + } catch (CorruptedSnapshotException cse) { + LOG.info("Expected exception " + cse); + } finally { + fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java index f739233ad10..666eea3a89d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java @@ -35,7 +35,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -528,6 +531,69 @@ public class SnapshotTestingUtils { return regionData.files; } + private void corruptFile(Path p) throws IOException { + String manifestName = p.getName(); + + // Rename the original region-manifest file + Path newP = new Path(p.getParent(), manifestName + "1"); + fs.rename(p, newP); + + // Create a new region-manifest file + FSDataOutputStream out = fs.create(p); + + //Copy the first 25 bytes of the original region-manifest into the new one, + //make it a corrupted region-manifest file. + FSDataInputStream input = fs.open(newP); + byte[] buffer = new byte[25]; + int len = input.read(0, buffer, 0, 25); + if (len > 1) { + out.write(buffer, 0, len - 1); + } + out.close(); + + // Delete the original region-manifest + fs.delete(newP); + } + + /** + * Corrupt one region-manifest file + * + * @throws IOException on unexecpted error from the FS + */ + public void corruptOneRegionManifest() throws IOException { + FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() { + @Override public boolean accept(Path path) { + return path.getName().startsWith(SnapshotManifestV2.SNAPSHOT_MANIFEST_PREFIX); + } + }); + + if (manifestFiles.length == 0) return; + + // Just choose the first one + Path p = manifestFiles[0].getPath(); + corruptFile(p); + } + + /** + * Corrupt data-manifest file + * + * @throws IOException on unexecpted error from the FS + */ + public void corruptDataManifest() throws IOException { + FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(SnapshotManifest.DATA_MANIFEST_NAME); + } + }); + + if (manifestFiles.length == 0) return; + + // Just choose the first one + Path p = manifestFiles[0].getPath(); + corruptFile(p); + } + public Path commit() throws IOException { ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getName()); SnapshotManifest manifest = SnapshotManifest.create(conf, fs, snapshotDir, desc, monitor); @@ -537,6 +603,13 @@ public class SnapshotTestingUtils { snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(desc, rootDir); return snapshotDir; } + + public void consolidate() throws IOException { + ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getName()); + SnapshotManifest manifest = SnapshotManifest.create(conf, fs, snapshotDir, desc, monitor); + manifest.addTableDescriptor(htd); + manifest.consolidate(); + } } public SnapshotMock(final Configuration conf, final FileSystem fs, final Path rootDir) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java index 1205da57663..835f92ea27a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java @@ -122,12 +122,12 @@ public class TestSnapshotManifest { try { SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); fail("fail to test snapshot manifest because message size is too small."); - } catch (InvalidProtocolBufferException ipbe) { + } catch (CorruptedSnapshotException cse) { try { conf.setInt(SnapshotManifest.SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 128 * 1024 * 1024); SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); LOG.info("open snapshot manifest succeed."); - } catch (InvalidProtocolBufferException ipbe2) { + } catch (CorruptedSnapshotException cse2) { fail("fail to take snapshot because Manifest proto-message too large."); } }