diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/FailedArchiveException.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/FailedArchiveException.java new file mode 100644 index 00000000000..4788f93292b --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/FailedArchiveException.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.backup; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import java.io.IOException; +import java.util.Collection; + +/** + * Exception indicating that some files in the requested set could not be archived. + */ +@InterfaceAudience.Private +public class FailedArchiveException extends IOException { + private final Collection failedFiles; + + public FailedArchiveException(String message, Collection failedFiles) { + super(message); + this.failedFiles = failedFiles; + } + + public Collection getFailedFiles() { + return failedFiles; + } + + @Override + public String getMessage() { + return new StringBuilder(super.getMessage()) + .append("; files=") + .append(failedFiles) + .toString(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java index d682ccce63c..ff45f11f272 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.backup; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -31,7 +32,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.Bytes; @@ -50,6 +53,7 @@ import com.google.common.collect.Lists; * for a HRegion from the {@link FileSystem}. The hfiles will be archived or deleted, depending on * the state of the system. */ +@InterfaceAudience.Private public class HFileArchiver { private static final Log LOG = LogFactory.getLog(HFileArchiver.class); private static final String SEPARATOR = "."; @@ -57,6 +61,14 @@ public class HFileArchiver { /** Number of retries in case of fs operation failure */ private static final int DEFAULT_RETRIES_NUMBER = 3; + private static final Function FUNC_FILE_TO_PATH = + new Function() { + @Override + public Path apply(File file) { + return file == null ? null : file.getPath(); + } + }; + private HFileArchiver() { // hidden ctor since this is just a util } @@ -132,21 +144,16 @@ public class HFileArchiver { // convert the files in the region to a File toArchive.addAll(Lists.transform(Arrays.asList(storeDirs), getAsFile)); LOG.debug("Archiving " + toArchive); - boolean success = false; - try { - success = resolveAndArchive(fs, regionArchiveDir, toArchive); - } catch (IOException e) { - LOG.error("Failed to archive " + toArchive, e); - success = false; + List failedArchive = resolveAndArchive(fs, regionArchiveDir, toArchive, + EnvironmentEdgeManager.currentTime()); + if (!failedArchive.isEmpty()) { + throw new FailedArchiveException("Failed to archive/delete all the files for region:" + + regionDir.getName() + " into " + regionArchiveDir + + ". Something is probably awry on the filesystem.", + Collections2.transform(failedArchive, FUNC_FILE_TO_PATH)); } - // if that was successful, then we delete the region - if (success) { - return deleteRegionWithoutArchiving(fs, regionDir); - } - - throw new IOException("Received error when attempting to archive files (" + toArchive - + "), cannot delete region directory. "); + return deleteRegionWithoutArchiving(fs, regionDir); } /** @@ -174,10 +181,13 @@ public class HFileArchiver { Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, parent, tableDir, family); // do the actual archive - if (!resolveAndArchive(fs, storeArchiveDir, toArchive)) { - throw new IOException("Failed to archive/delete all the files for region:" + List failedArchive = resolveAndArchive(fs, storeArchiveDir, toArchive, + EnvironmentEdgeManager.currentTime()); + if (!failedArchive.isEmpty()){ + throw new FailedArchiveException("Failed to archive/delete all the files for region:" + Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family) - + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); + + " into " + storeArchiveDir + ". Something is probably awry on the filesystem.", + Collections2.transform(failedArchive, FUNC_FILE_TO_PATH)); } } @@ -192,7 +202,8 @@ public class HFileArchiver { * @throws IOException if the files could not be correctly disposed. */ public static void archiveStoreFiles(Configuration conf, FileSystem fs, HRegionInfo regionInfo, - Path tableDir, byte[] family, Collection compactedFiles) throws IOException { + Path tableDir, byte[] family, Collection compactedFiles) + throws IOException, FailedArchiveException { // sometimes in testing, we don't have rss, so we need to check for that if (fs == null) { @@ -228,10 +239,14 @@ public class HFileArchiver { Collection storeFiles = Collections2.transform(compactedFiles, getStorePath); // do the actual archive - if (!resolveAndArchive(fs, storeArchiveDir, storeFiles)) { - throw new IOException("Failed to archive/delete all the files for region:" + List failedArchive = resolveAndArchive(fs, storeArchiveDir, storeFiles, + EnvironmentEdgeManager.currentTime()); + + if (!failedArchive.isEmpty()){ + throw new FailedArchiveException("Failed to archive/delete all the files for region:" + Bytes.toString(regionInfo.getRegionName()) + ", family:" + Bytes.toString(family) - + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); + + " into " + storeArchiveDir + ". Something is probably awry on the filesystem.", + Collections2.transform(failedArchive, FUNC_FILE_TO_PATH)); } } @@ -264,36 +279,6 @@ public class HFileArchiver { } } - /** - * Archive the given files and resolve any conflicts with existing files via appending the time - * archiving started (so all conflicts in the same group have the same timestamp appended). - *

- * If any of the passed files to archive are directories, archives all the files under that - * directory. Archive directory structure for children is the base archive directory name + the - * parent directory and is built recursively is passed files are directories themselves. - * @param fs {@link FileSystem} on which to archive the files - * @param baseArchiveDir base archive directory to archive the given files - * @param toArchive files to be archived - * @return true on success, false otherwise - * @throws IOException on unexpected failure - */ - private static boolean resolveAndArchive(FileSystem fs, Path baseArchiveDir, - Collection toArchive) throws IOException { - if (LOG.isTraceEnabled()) LOG.trace("Starting to archive " + toArchive); - long start = EnvironmentEdgeManager.currentTime(); - List failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start); - - // notify that some files were not archived. - // We can't delete the files otherwise snapshots or other backup system - // that relies on the archiver end up with data loss. - if (failures.size() > 0) { - LOG.warn("Failed to complete archive of: " + failures + - ". Those files are still in the original location, and they may slow down reads."); - return false; - } - return true; - } - /** * Resolve any conflict with an existing archive file via timestamp-append * renaming of the existing file and then archive the passed in files. @@ -423,6 +408,10 @@ public class HFileArchiver { try { success = currentFile.moveAndClose(archiveFile); + } catch (FileNotFoundException fnfe) { + LOG.warn("Failed to archive " + currentFile + + " because it does not exist! Skipping and continuing on.", fnfe); + success = true; } catch (IOException e) { LOG.warn("Failed to archive " + currentFile + " on try #" + i, e); success = false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 2757eae1bca..43845efc67a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -1534,7 +1534,11 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } catch (InterruptedException e) { throw (InterruptedIOException)new InterruptedIOException().initCause(e); } catch (ExecutionException e) { - throw new IOException(e.getCause()); + Throwable cause = e.getCause(); + if (cause instanceof IOException) { + throw (IOException) cause; + } + throw new IOException(cause); } finally { storeCloserThreadPool.shutdownNow(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 7f00b7dd8d8..6f47b0e9c66 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.TagType; +import org.apache.hadoop.hbase.backup.FailedArchiveException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.conf.ConfigurationManager; @@ -2729,7 +2730,24 @@ public class HStore implements Store { LOG.debug("Moving the files " + filesToRemove + " to archive"); } // Only if this is successful it has to be removed - this.fs.removeStoreFiles(this.getFamily().getNameAsString(), filesToRemove); + try { + this.fs.removeStoreFiles(this.getFamily().getNameAsString(), filesToRemove); + } catch (FailedArchiveException fae) { + // Even if archiving some files failed, we still need to clear out any of the + // files which were successfully archived. Otherwise we will receive a + // FileNotFoundException when we attempt to re-archive them in the next go around. + Collection failedFiles = fae.getFailedFiles(); + Iterator iter = filesToRemove.iterator(); + while (iter.hasNext()) { + if (failedFiles.contains(iter.next().getPath())) { + iter.remove(); + } + } + if (!filesToRemove.isEmpty()) { + clearCompactedfiles(filesToRemove); + } + throw fae; + } } } if (!filesToRemove.isEmpty()) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java index 70623e9ce38..8c1644e0535 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MockStoreFile.java @@ -44,6 +44,7 @@ public class MockStoreFile extends StoreFile { boolean isMajor; HDFSBlocksDistribution hdfsBlocksDistribution; long modificationTime; + boolean compactedAway; MockStoreFile(HBaseTestingUtility testUtil, Path testPath, long length, long ageInDisk, boolean isRef, long sequenceid) throws IOException { @@ -120,6 +121,11 @@ public class MockStoreFile extends StoreFile { null : timeRangeTracker.getMax(); } + @Override + public void markCompactedAway() { + this.compactedAway = true; + } + @Override public long getModificationTimeStamp() { return modificationTime; @@ -135,6 +141,7 @@ public class MockStoreFile extends StoreFile { final long len = this.length; final TimeRangeTracker timeRangeTracker = this.timeRangeTracker; final long entries = this.entryCount; + final boolean compactedAway = this.compactedAway; return new StoreFile.Reader() { @Override public long length() { @@ -150,6 +157,16 @@ public class MockStoreFile extends StoreFile { public long getEntries() { return entries; } + + @Override + public boolean isCompactedAway() { + return compactedAway; + } + + @Override + public void close(boolean evictOnClose) throws IOException { + // no-op + } }; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveIOException.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveIOException.java new file mode 100644 index 00000000000..d9471f9b72a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionArchiveIOException.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableList; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.backup.FailedArchiveException; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.wal.WALFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * Tests that archiving compacted files behaves correctly when encountering exceptions. + */ +@Category(MediumTests.class) +public class TestCompactionArchiveIOException { + private static final String ERROR_FILE = "fffffffffffffffffdeadbeef"; + + public HBaseTestingUtility testUtil; + + private Path testDir; + + @Before + public void setup() throws Exception { + testUtil = new HBaseTestingUtility(); + testUtil.startMiniDFSCluster(1); + testDir = testUtil.getDataTestDirOnTestFS(); + FSUtils.setRootDir(testUtil.getConfiguration(), testDir); + } + + @After + public void tearDown() throws Exception { + testUtil.cleanupTestDir(); + testUtil.shutdownMiniDFSCluster(); + } + + @Test + public void testRemoveCompactedFilesWithException() throws Exception { + byte[] fam = Bytes.toBytes("f"); + byte[] col = Bytes.toBytes("c"); + byte[] val = Bytes.toBytes("val"); + + TableName tableName = TableName.valueOf(getClass().getSimpleName()); + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(fam)); + HRegionInfo info = new HRegionInfo(tableName, null, null, false); + final HRegion region = initHRegion(htd, info); + RegionServerServices rss = mock(RegionServerServices.class); + List regions = new ArrayList(); + regions.add(region); + when(rss.getOnlineRegions()).thenReturn(regions); + + // Create the cleaner object + final CompactedHFilesDischarger cleaner = + new CompactedHFilesDischarger(1000, (Stoppable) null, rss, false); + // Add some data to the region and do some flushes + int batchSize = 10; + int fileCount = 10; + for (int f = 0; f < fileCount; f++) { + int start = f * batchSize; + for (int i = start; i < start + batchSize; i++) { + Put p = new Put(Bytes.toBytes("row" + i)); + p.addColumn(fam, col, val); + region.put(p); + } + // flush them + region.flush(true); + } + + HStore store = (HStore) region.getStore(fam); + assertEquals(fileCount, store.getStorefilesCount()); + + Collection storefiles = store.getStorefiles(); + // None of the files should be in compacted state. + for (StoreFile file : storefiles) { + assertFalse(file.isCompactedAway()); + } + + StoreFileManager fileManager = store.getStoreEngine().getStoreFileManager(); + Collection initialCompactedFiles = fileManager.getCompactedfiles(); + assertTrue(initialCompactedFiles == null || initialCompactedFiles.isEmpty()); + + // Do compaction + region.compact(true); + + // all prior store files should now be compacted + Collection compactedFilesPreClean = fileManager.getCompactedfiles(); + assertNotNull(compactedFilesPreClean); + assertTrue(compactedFilesPreClean.size() > 0); + + // add the dummy file to the store directory + HRegionFileSystem regionFS = region.getRegionFileSystem(); + Path errFile = regionFS.getStoreFilePath(Bytes.toString(fam), ERROR_FILE); + FSDataOutputStream out = regionFS.getFileSystem().create(errFile); + out.writeInt(1); + out.close(); + + StoreFile errStoreFile = new MockStoreFile(testUtil, errFile, 1, 0, false, 1); + fileManager.addCompactionResults( + ImmutableList.of(errStoreFile), ImmutableList.of()); + + // cleanup compacted files + cleaner.chore(); + + // make sure the compacted files are cleared + Collection compactedFilesPostClean = fileManager.getCompactedfiles(); + assertEquals(1, compactedFilesPostClean.size()); + for (StoreFile origFile : compactedFilesPreClean) { + assertFalse(compactedFilesPostClean.contains(origFile)); + } + + // close the region + try { + region.close(); + } catch (FailedArchiveException e) { + // expected due to errorfile + assertEquals(1, e.getFailedFiles().size()); + assertEquals(ERROR_FILE, e.getFailedFiles().iterator().next().getName()); + } + } + + private HRegion initHRegion(HTableDescriptor htd, HRegionInfo info) + throws IOException { + Configuration conf = testUtil.getConfiguration(); + Path tableDir = FSUtils.getTableDir(testDir, htd.getTableName()); + Path regionDir = new Path(tableDir, info.getEncodedName()); + Path storeDir = new Path(regionDir, htd.getColumnFamilies()[0].getNameAsString()); + + + FileSystem errFS = spy(testUtil.getTestFileSystem()); + // Prior to HBASE-16964, when an exception is thrown archiving any compacted file, + // none of the other files are cleared from the compactedfiles list. + // Simulate this condition with a dummy file + doThrow(new IOException("Error for test")) + .when(errFS).rename(eq(new Path(storeDir, ERROR_FILE)), any(Path.class)); + + HRegionFileSystem fs = new HRegionFileSystem(conf, errFS, tableDir, info); + final Configuration walConf = new Configuration(conf); + FSUtils.setRootDir(walConf, tableDir); + final WALFactory wals = new WALFactory(walConf, null, "log_" + info.getEncodedName()); + HRegion region = + new HRegion(fs, wals.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace()), + conf, htd, null); + + region.initialize(); + + return region; + } +}