From d896a51291a3a0d806edba496b4fe5351b18bbf5 Mon Sep 17 00:00:00 2001 From: Matteo Bertozzi Date: Tue, 12 Apr 2016 12:23:28 -0700 Subject: [PATCH] HBASE-15621 Suppress Hbase SnapshotHFile cleaner error messages when a snaphot is going on (Huaxiang Sun) --- .../master/snapshot/SnapshotHFileCleaner.java | 5 +- .../hbase/snapshot/SnapshotManifest.java | 6 ++ .../hbase/snapshot/SnapshotManifestV2.java | 15 +++- .../snapshot/TestSnapshotHFileCleaner.java | 86 ++++++++++++++++++- .../hbase/snapshot/SnapshotTestingUtils.java | 73 ++++++++++++++++ .../hbase/snapshot/TestSnapshotManifest.java | 4 +- 6 files changed, 179 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java index 09b0c94b365..df03d63db2c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; import org.apache.hadoop.hbase.util.FSUtils; @@ -60,10 +61,12 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { public synchronized Iterable getDeletableFiles(Iterable files) { try { return cache.getUnreferencedFiles(files); + } catch (CorruptedSnapshotException cse) { + LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse); } catch (IOException e) { LOG.error("Exception while checking if files were valid, keeping them just in case.", e); - return Collections.emptyList(); } + return Collections.emptyList(); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java index 0abdf931dc5..004d580b0ed 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifest.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.snapshot; import com.google.protobuf.CodedInputStream; +import com.google.protobuf.InvalidProtocolBufferException; import java.io.FileNotFoundException; import java.io.IOException; @@ -289,6 +290,9 @@ public final class SnapshotManifest { try { v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, fs, workingDir, desc); v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, fs, workingDir, desc); + } catch (InvalidProtocolBufferException e) { + throw new CorruptedSnapshotException("unable to parse region manifest " + + e.getMessage(), e); } finally { tpool.shutdown(); } @@ -442,6 +446,8 @@ public final class SnapshotManifest { return SnapshotDataManifest.parseFrom(cin); } catch (FileNotFoundException e) { return null; + } catch (InvalidProtocolBufferException e) { + throw new CorruptedSnapshotException("unable to parse data manifest " + e.getMessage(), e); } finally { if (in != null) in.close(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java index 2df9bef9f43..3bb3575f3f6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.snapshot; +import com.google.protobuf.InvalidProtocolBufferException; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; @@ -58,7 +59,7 @@ public final class SnapshotManifestV2 { public static final int DESCRIPTOR_VERSION = 2; - private static final String SNAPSHOT_MANIFEST_PREFIX = "region-manifest."; + public static final String SNAPSHOT_MANIFEST_PREFIX = "region-manifest."; private SnapshotManifestV2() {} @@ -154,9 +155,15 @@ public final class SnapshotManifestV2 { } catch (InterruptedException e) { throw new InterruptedIOException(e.getMessage()); } catch (ExecutionException e) { - IOException ex = new IOException(); - ex.initCause(e.getCause()); - throw ex; + Throwable t = e.getCause(); + + if(t instanceof InvalidProtocolBufferException) { + throw (InvalidProtocolBufferException)t; + } else { + IOException ex = new IOException("ExecutionException"); + ex.initCause(e.getCause()); + throw ex; + } } return regionsManifest; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java index 65a057dd6a0..616907cf94e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java @@ -20,7 +20,11 @@ package org.apache.hadoop.hbase.master.snapshot; import static org.junit.Assert.assertFalse; import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -28,11 +32,15 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -42,13 +50,25 @@ import org.junit.experimental.categories.Category; @Category(SmallTests.class) public class TestSnapshotHFileCleaner { + private static final Log LOG = LogFactory.getLog(TestSnapshotFileCache.class); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final String TABLE_NAME_STR = "testSnapshotManifest"; + private static final String SNAPSHOT_NAME_STR = "testSnapshotManifest-snapshot"; + private static Path rootDir; + private static FileSystem fs; + + /** + * Setup the test environment + */ + @BeforeClass + public static void setup() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + rootDir = FSUtils.getRootDir(conf); + fs = FileSystem.get(conf); + } @AfterClass public static void cleanup() throws IOException { - Configuration conf = TEST_UTIL.getConfiguration(); - Path rootDir = FSUtils.getRootDir(conf); - FileSystem fs = FileSystem.get(conf); // cleanup fs.delete(rootDir, true); } @@ -86,4 +106,64 @@ public class TestSnapshotHFileCleaner { // make sure that the file isn't deletable assertFalse(cleaner.isFileDeletable(fs.getFileStatus(refFile))); } + + class SnapshotFiles implements SnapshotFileCache.SnapshotFileInspector { + public Collection filesUnderSnapshot(final Path snapshotDir) throws IOException { + Collection files = new HashSet(); + files.addAll(SnapshotReferenceUtil.getHFileNames(TEST_UTIL.getConfiguration(), fs, snapshotDir)); + return files; + } + } + + /** + * If there is a corrupted region manifest, it should throw out CorruptedSnapshotException, + * instead of an IOException + */ + @Test + public void testCorruptedRegionManifest() throws IOException { + SnapshotTestingUtils.SnapshotMock + snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2( + SNAPSHOT_NAME_STR, TABLE_NAME_STR); + builder.addRegionV2(); + builder.corruptOneRegionManifest(); + + long period = Long.MAX_VALUE; + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + try { + cache.getSnapshotsInProgress(); + } catch (CorruptedSnapshotException cse) { + LOG.info("Expected exception " + cse); + } finally { + fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true); + } + } + + /** + * If there is a corrupted data manifest, it should throw out CorruptedSnapshotException, + * instead of an IOException + */ + @Test + public void testCorruptedDataManifest() throws IOException { + SnapshotTestingUtils.SnapshotMock + snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir); + SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2( + SNAPSHOT_NAME_STR, TABLE_NAME_STR); + builder.addRegionV2(); + // consolidate to generate a data.manifest file + builder.consolidate(); + builder.corruptDataManifest(); + + long period = Long.MAX_VALUE; + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + try { + cache.getSnapshotsInProgress(); + } catch (CorruptedSnapshotException cse) { + LOG.info("Expected exception " + cse); + } finally { + fs.delete(SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir), true); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java index 9213a3df3c4..22c28e0689c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java @@ -35,7 +35,10 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -520,6 +523,69 @@ public class SnapshotTestingUtils { return regionData.files; } + private void corruptFile(Path p) throws IOException { + String manifestName = p.getName(); + + // Rename the original region-manifest file + Path newP = new Path(p.getParent(), manifestName + "1"); + fs.rename(p, newP); + + // Create a new region-manifest file + FSDataOutputStream out = fs.create(p); + + //Copy the first 25 bytes of the original region-manifest into the new one, + //make it a corrupted region-manifest file. + FSDataInputStream input = fs.open(newP); + byte[] buffer = new byte[25]; + int len = input.read(0, buffer, 0, 25); + if (len > 1) { + out.write(buffer, 0, len - 1); + } + out.close(); + + // Delete the original region-manifest + fs.delete(newP); + } + + /** + * Corrupt one region-manifest file + * + * @throws IOException on unexecpted error from the FS + */ + public void corruptOneRegionManifest() throws IOException { + FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() { + @Override public boolean accept(Path path) { + return path.getName().startsWith(SnapshotManifestV2.SNAPSHOT_MANIFEST_PREFIX); + } + }); + + if (manifestFiles.length == 0) return; + + // Just choose the first one + Path p = manifestFiles[0].getPath(); + corruptFile(p); + } + + /** + * Corrupt data-manifest file + * + * @throws IOException on unexecpted error from the FS + */ + public void corruptDataManifest() throws IOException { + FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir, new PathFilter() { + @Override + public boolean accept(Path path) { + return path.getName().startsWith(SnapshotManifest.DATA_MANIFEST_NAME); + } + }); + + if (manifestFiles.length == 0) return; + + // Just choose the first one + Path p = manifestFiles[0].getPath(); + corruptFile(p); + } + public Path commit() throws IOException { ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getName()); SnapshotManifest manifest = SnapshotManifest.create(conf, fs, snapshotDir, desc, monitor); @@ -529,6 +595,13 @@ public class SnapshotTestingUtils { snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(desc, rootDir); return snapshotDir; } + + public void consolidate() throws IOException { + ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getName()); + SnapshotManifest manifest = SnapshotManifest.create(conf, fs, snapshotDir, desc, monitor); + manifest.addTableDescriptor(htd); + manifest.consolidate(); + } } public SnapshotMock(final Configuration conf, final FileSystem fs, final Path rootDir) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java index 870bfd96936..aa93100c229 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotManifest.java @@ -120,12 +120,12 @@ public class TestSnapshotManifest { try { SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); fail("fail to test snapshot manifest because message size is too small."); - } catch (InvalidProtocolBufferException ipbe) { + } catch (CorruptedSnapshotException cse) { try { conf.setInt(SnapshotManifest.SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 128 * 1024 * 1024); SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); LOG.info("open snapshot manifest succeed."); - } catch (InvalidProtocolBufferException ipbe2) { + } catch (CorruptedSnapshotException cse2) { fail("fail to take snapshot because Manifest proto-message too large."); } }