HBASE-16964 Always clear successfully archived files from compacted files list

This commit is contained in:
Gary Helmling 2016-10-28 18:13:10 -07:00
parent c82fc6e0b2
commit e820e64272
6 changed files with 327 additions and 52 deletions

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.backup;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import java.io.IOException;
import java.util.Collection;
/**
* Exception indicating that some files in the requested set could not be archived.
*/
@InterfaceAudience.Private
public class FailedArchiveException extends IOException {
private final Collection<Path> failedFiles;
public FailedArchiveException(String message, Collection<Path> failedFiles) {
super(message);
this.failedFiles = failedFiles;
}
public Collection<Path> getFailedFiles() {
return failedFiles;
}
@Override
public String getMessage() {
return new StringBuilder(super.getMessage())
.append("; files=")
.append(failedFiles)
.toString();
}
}

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.backup;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -31,7 +32,9 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Bytes;
@ -50,6 +53,7 @@ import com.google.common.collect.Lists;
* for a HRegion from the {@link FileSystem}. The hfiles will be archived or deleted, depending on
* the state of the system.
*/
@InterfaceAudience.Private
public class HFileArchiver {
private static final Log LOG = LogFactory.getLog(HFileArchiver.class);
private static final String SEPARATOR = ".";
@ -57,6 +61,14 @@ public class HFileArchiver {
/** Number of retries in case of fs operation failure */
private static final int DEFAULT_RETRIES_NUMBER = 3;
private static final Function<File, Path> FUNC_FILE_TO_PATH =
new Function<File, Path>() {
@Override
public Path apply(File file) {
return file == null ? null : file.getPath();
}
};
private HFileArchiver() {
// hidden ctor since this is just a util
}
@ -132,21 +144,16 @@ public class HFileArchiver {
// convert the files in the region to a File
toArchive.addAll(Lists.transform(Arrays.asList(storeDirs), getAsFile));
LOG.debug("Archiving " + toArchive);
boolean success = false;
try {
success = resolveAndArchive(fs, regionArchiveDir, toArchive);
} catch (IOException e) {
LOG.error("Failed to archive " + toArchive, e);
success = false;
List<File> failedArchive = resolveAndArchive(fs, regionArchiveDir, toArchive,
EnvironmentEdgeManager.currentTime());
if (!failedArchive.isEmpty()) {
throw new FailedArchiveException("Failed to archive/delete all the files for region:"
+ regionDir.getName() + " into " + regionArchiveDir
+ ". Something is probably awry on the filesystem.",
Collections2.transform(failedArchive, FUNC_FILE_TO_PATH));
}
// if that was successful, then we delete the region
if (success) {
return deleteRegionWithoutArchiving(fs, regionDir);
}
throw new IOException("Received error when attempting to archive files (" + toArchive
+ "), cannot delete region directory. ");
return deleteRegionWithoutArchiving(fs, regionDir);
}
/**
@ -174,10 +181,13 @@ public class HFileArchiver {
Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, parent, tableDir, family);
// do the actual archive
if (!resolveAndArchive(fs, storeArchiveDir, toArchive)) {
throw new IOException("Failed to archive/delete all the files for region:"
List<File> failedArchive = resolveAndArchive(fs, storeArchiveDir, toArchive,
EnvironmentEdgeManager.currentTime());
if (!failedArchive.isEmpty()){
throw new FailedArchiveException("Failed to archive/delete all the files for region:"
+ Bytes.toString(parent.getRegionName()) + ", family:" + Bytes.toString(family)
+ " into " + storeArchiveDir + ". Something is probably awry on the filesystem.");
+ " into " + storeArchiveDir + ". Something is probably awry on the filesystem.",
Collections2.transform(failedArchive, FUNC_FILE_TO_PATH));
}
}
@ -192,7 +202,8 @@ public class HFileArchiver {
* @throws IOException if the files could not be correctly disposed.
*/
public static void archiveStoreFiles(Configuration conf, FileSystem fs, HRegionInfo regionInfo,
Path tableDir, byte[] family, Collection<StoreFile> compactedFiles) throws IOException {
Path tableDir, byte[] family, Collection<StoreFile> compactedFiles)
throws IOException, FailedArchiveException {
// sometimes in testing, we don't have rss, so we need to check for that
if (fs == null) {
@ -228,10 +239,14 @@ public class HFileArchiver {
Collection<File> storeFiles = Collections2.transform(compactedFiles, getStorePath);
// do the actual archive
if (!resolveAndArchive(fs, storeArchiveDir, storeFiles)) {
throw new IOException("Failed to archive/delete all the files for region:"
List<File> failedArchive = resolveAndArchive(fs, storeArchiveDir, storeFiles,
EnvironmentEdgeManager.currentTime());
if (!failedArchive.isEmpty()){
throw new FailedArchiveException("Failed to archive/delete all the files for region:"
+ Bytes.toString(regionInfo.getRegionName()) + ", family:" + Bytes.toString(family)
+ " into " + storeArchiveDir + ". Something is probably awry on the filesystem.");
+ " into " + storeArchiveDir + ". Something is probably awry on the filesystem.",
Collections2.transform(failedArchive, FUNC_FILE_TO_PATH));
}
}
@ -264,36 +279,6 @@ public class HFileArchiver {
}
}
/**
* Archive the given files and resolve any conflicts with existing files via appending the time
* archiving started (so all conflicts in the same group have the same timestamp appended).
* <p>
* If any of the passed files to archive are directories, archives all the files under that
* directory. Archive directory structure for children is the base archive directory name + the
* parent directory and is built recursively is passed files are directories themselves.
* @param fs {@link FileSystem} on which to archive the files
* @param baseArchiveDir base archive directory to archive the given files
* @param toArchive files to be archived
* @return <tt>true</tt> on success, <tt>false</tt> otherwise
* @throws IOException on unexpected failure
*/
private static boolean resolveAndArchive(FileSystem fs, Path baseArchiveDir,
Collection<File> toArchive) throws IOException {
if (LOG.isTraceEnabled()) LOG.trace("Starting to archive " + toArchive);
long start = EnvironmentEdgeManager.currentTime();
List<File> failures = resolveAndArchive(fs, baseArchiveDir, toArchive, start);
// notify that some files were not archived.
// We can't delete the files otherwise snapshots or other backup system
// that relies on the archiver end up with data loss.
if (failures.size() > 0) {
LOG.warn("Failed to complete archive of: " + failures +
". Those files are still in the original location, and they may slow down reads.");
return false;
}
return true;
}
/**
* Resolve any conflict with an existing archive file via timestamp-append
* renaming of the existing file and then archive the passed in files.
@ -423,6 +408,10 @@ public class HFileArchiver {
try {
success = currentFile.moveAndClose(archiveFile);
} catch (FileNotFoundException fnfe) {
LOG.warn("Failed to archive " + currentFile +
" because it does not exist! Skipping and continuing on.", fnfe);
success = true;
} catch (IOException e) {
LOG.warn("Failed to archive " + currentFile + " on try #" + i, e);
success = false;

View File

@ -1579,7 +1579,11 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
} catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException().initCause(e);
} catch (ExecutionException e) {
throw new IOException(e.getCause());
Throwable cause = e.getCause();
if (cause instanceof IOException) {
throw (IOException) cause;
}
throw new IOException(cause);
} finally {
storeCloserThreadPool.shutdownNow();
}

View File

@ -33,6 +33,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NavigableSet;
import java.util.Set;
@ -60,6 +61,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.FailedArchiveException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.conf.ConfigurationManager;
@ -2443,7 +2445,24 @@ public class HStore implements Store {
LOG.debug("Moving the files " + filesToRemove + " to archive");
}
// Only if this is successful it has to be removed
this.fs.removeStoreFiles(this.getFamily().getNameAsString(), filesToRemove);
try {
this.fs.removeStoreFiles(this.getFamily().getNameAsString(), filesToRemove);
} catch (FailedArchiveException fae) {
// Even if archiving some files failed, we still need to clear out any of the
// files which were successfully archived. Otherwise we will receive a
// FileNotFoundException when we attempt to re-archive them in the next go around.
Collection<Path> failedFiles = fae.getFailedFiles();
Iterator<StoreFile> iter = filesToRemove.iterator();
while (iter.hasNext()) {
if (failedFiles.contains(iter.next().getPath())) {
iter.remove();
}
}
if (!filesToRemove.isEmpty()) {
clearCompactedfiles(filesToRemove);
}
throw fae;
}
}
}
if (!filesToRemove.isEmpty()) {

View File

@ -43,6 +43,7 @@ public class MockStoreFile extends StoreFile {
boolean isMajor;
HDFSBlocksDistribution hdfsBlocksDistribution;
long modificationTime;
boolean compactedAway;
MockStoreFile(HBaseTestingUtility testUtil, Path testPath,
long length, long ageInDisk, boolean isRef, long sequenceid) throws IOException {
@ -119,6 +120,11 @@ public class MockStoreFile extends StoreFile {
null : timeRangeTracker.getMax();
}
@Override
public void markCompactedAway() {
this.compactedAway = true;
}
@Override
public long getModificationTimeStamp() {
return modificationTime;
@ -134,6 +140,7 @@ public class MockStoreFile extends StoreFile {
final long len = this.length;
final TimeRangeTracker timeRangeTracker = this.timeRangeTracker;
final long entries = this.entryCount;
final boolean compactedAway = this.compactedAway;
return new StoreFileReader() {
@Override
public long length() {
@ -149,6 +156,16 @@ public class MockStoreFile extends StoreFile {
public long getEntries() {
return entries;
}
@Override
public boolean isCompactedAway() {
return compactedAway;
}
@Override
public void close(boolean evictOnClose) throws IOException {
// no-op
}
};
}
}

View File

@ -0,0 +1,196 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.FailedArchiveException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Tests that archiving compacted files behaves correctly when encountering exceptions.
*/
@Category(MediumTests.class)
public class TestCompactionArchiveIOException {
private static final String ERROR_FILE = "fffffffffffffffffdeadbeef";
public HBaseTestingUtility testUtil;
private Path testDir;
@Before
public void setup() throws Exception {
testUtil = new HBaseTestingUtility();
testUtil.startMiniDFSCluster(1);
testDir = testUtil.getDataTestDirOnTestFS();
FSUtils.setRootDir(testUtil.getConfiguration(), testDir);
}
@After
public void tearDown() throws Exception {
testUtil.cleanupTestDir();
testUtil.shutdownMiniDFSCluster();
}
@Test
public void testRemoveCompactedFilesWithException() throws Exception {
byte[] fam = Bytes.toBytes("f");
byte[] col = Bytes.toBytes("c");
byte[] val = Bytes.toBytes("val");
TableName tableName = TableName.valueOf(getClass().getSimpleName());
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor(fam));
HRegionInfo info = new HRegionInfo(tableName, null, null, false);
final HRegion region = initHRegion(htd, info);
RegionServerServices rss = mock(RegionServerServices.class);
List<Region> regions = new ArrayList<Region>();
regions.add(region);
when(rss.getOnlineRegions()).thenReturn(regions);
// Create the cleaner object
final CompactedHFilesDischarger cleaner =
new CompactedHFilesDischarger(1000, (Stoppable) null, rss, false);
// Add some data to the region and do some flushes
int batchSize = 10;
int fileCount = 10;
for (int f = 0; f < fileCount; f++) {
int start = f * batchSize;
for (int i = start; i < start + batchSize; i++) {
Put p = new Put(Bytes.toBytes("row" + i));
p.addColumn(fam, col, val);
region.put(p);
}
// flush them
region.flush(true);
}
HStore store = (HStore) region.getStore(fam);
assertEquals(fileCount, store.getStorefilesCount());
Collection<StoreFile> storefiles = store.getStorefiles();
// None of the files should be in compacted state.
for (StoreFile file : storefiles) {
assertFalse(file.isCompactedAway());
}
StoreFileManager fileManager = store.getStoreEngine().getStoreFileManager();
Collection<StoreFile> initialCompactedFiles = fileManager.getCompactedfiles();
assertTrue(initialCompactedFiles == null || initialCompactedFiles.isEmpty());
// Do compaction
region.compact(true);
// all prior store files should now be compacted
Collection<StoreFile> compactedFilesPreClean = fileManager.getCompactedfiles();
assertNotNull(compactedFilesPreClean);
assertTrue(compactedFilesPreClean.size() > 0);
// add the dummy file to the store directory
HRegionFileSystem regionFS = region.getRegionFileSystem();
Path errFile = regionFS.getStoreFilePath(Bytes.toString(fam), ERROR_FILE);
FSDataOutputStream out = regionFS.getFileSystem().create(errFile);
out.writeInt(1);
out.close();
StoreFile errStoreFile = new MockStoreFile(testUtil, errFile, 1, 0, false, 1);
fileManager.addCompactionResults(
ImmutableList.of(errStoreFile), ImmutableList.<StoreFile>of());
// cleanup compacted files
cleaner.chore();
// make sure the compacted files are cleared
Collection<StoreFile> compactedFilesPostClean = fileManager.getCompactedfiles();
assertEquals(1, compactedFilesPostClean.size());
for (StoreFile origFile : compactedFilesPreClean) {
assertFalse(compactedFilesPostClean.contains(origFile));
}
// close the region
try {
region.close();
} catch (FailedArchiveException e) {
// expected due to errorfile
assertEquals(1, e.getFailedFiles().size());
assertEquals(ERROR_FILE, e.getFailedFiles().iterator().next().getName());
}
}
private HRegion initHRegion(HTableDescriptor htd, HRegionInfo info)
throws IOException {
Configuration conf = testUtil.getConfiguration();
Path tableDir = FSUtils.getTableDir(testDir, htd.getTableName());
Path regionDir = new Path(tableDir, info.getEncodedName());
Path storeDir = new Path(regionDir, htd.getColumnFamilies()[0].getNameAsString());
FileSystem errFS = spy(testUtil.getTestFileSystem());
// Prior to HBASE-16964, when an exception is thrown archiving any compacted file,
// none of the other files are cleared from the compactedfiles list.
// Simulate this condition with a dummy file
doThrow(new IOException("Error for test"))
.when(errFS).rename(eq(new Path(storeDir, ERROR_FILE)), any(Path.class));
HRegionFileSystem fs = new HRegionFileSystem(conf, errFS, tableDir, info);
final Configuration walConf = new Configuration(conf);
FSUtils.setRootDir(walConf, tableDir);
final WALFactory wals = new WALFactory(walConf, null, "log_" + info.getEncodedName());
HRegion region =
new HRegion(fs, wals.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace()),
conf, htd, null);
region.initialize();
return region;
}
}