HBASE-7365 Safer table creation and deletion using .tmp dir (Matteo Bertozzi)

git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-7290@1445844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2013-02-13 18:53:26 +00:00
parent 56184de34d
commit 253cc4c212
9 changed files with 315 additions and 52 deletions

View File

@ -748,10 +748,13 @@ public final class HConstants {
*/
public static final String SNAPSHOT_DIR_NAME = ".snapshot";
/** Temporary directory used for table creation and deletion */
public static final String HBASE_TEMP_DIRECTORY = ".tmp";
public static final List<String> HBASE_NON_USER_TABLE_DIRS = new ArrayList<String>(
Arrays.asList(new String[] { HREGION_LOGDIR_NAME, HREGION_OLDLOGDIR_NAME, CORRUPT_DIR_NAME,
toString(META_TABLE_NAME), toString(ROOT_TABLE_NAME), SPLIT_LOGDIR_NAME,
HBCK_SIDELINEDIR_NAME, HFILE_ARCHIVE_DIRECTORY, SNAPSHOT_DIR_NAME }));
HBCK_SIDELINEDIR_NAME, HFILE_ARCHIVE_DIRECTORY, SNAPSHOT_DIR_NAME, HBASE_TEMP_DIRECTORY }));
private HConstants() {
// Can't be instantiated with this ctor.

View File

@ -107,7 +107,7 @@ public class HFileArchiver {
// make sure the regiondir lives under the tabledir
Preconditions.checkArgument(regionDir.toString().startsWith(tableDir.toString()));
Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(conf, tableDir, regionDir);
Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(rootdir, tableDir, regionDir);
LOG.debug("Have an archive directory, preparing to move files");
FileStatusConverter getAsFile = new FileStatusConverter(fs);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.catalog;
import java.io.IOException;
import java.net.ConnectException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
@ -138,11 +139,22 @@ public class MetaEditor {
* @param d Delete to add to .META.
* @throws IOException
*/
static void deleteMetaTable(final CatalogTracker ct, final Delete d)
static void deleteFromMetaTable(final CatalogTracker ct, final Delete d)
throws IOException {
deleteFromMetaTable(ct, Arrays.asList(d));
}
/**
* Delete the passed <code>deletes</code> from the <code>.META.</code> table.
* @param ct CatalogTracker on whose back we will ride the edit.
* @param deletes Deletes to add to .META.
* @throws IOException
*/
static void deleteFromMetaTable(final CatalogTracker ct, final List<Delete> deletes)
throws IOException {
HTable t = MetaReader.getMetaHTable(ct);
try {
t.delete(d);
t.delete(deletes);
} finally {
t.close();
}
@ -318,10 +330,26 @@ public class MetaEditor {
HRegionInfo regionInfo)
throws IOException {
Delete delete = new Delete(regionInfo.getRegionName());
deleteMetaTable(catalogTracker, delete);
deleteFromMetaTable(catalogTracker, delete);
LOG.info("Deleted region " + regionInfo.getRegionNameAsString() + " from META");
}
/**
* Deletes the specified regions from META.
* @param catalogTracker
* @param regionsInfo list of regions to be deleted from META
* @throws IOException
*/
public static void deleteRegions(CatalogTracker catalogTracker,
List<HRegionInfo> regionsInfo) throws IOException {
List<Delete> deletes = new ArrayList<Delete>(regionsInfo.size());
for (HRegionInfo hri: regionsInfo) {
deletes.add(new Delete(hri.getRegionName()));
}
deleteFromMetaTable(catalogTracker, deletes);
LOG.info("Deleted from META, regions: " + regionsInfo);
}
/**
* Deletes daughters references in offlined split parent.
* @param catalogTracker
@ -335,7 +363,7 @@ public class MetaEditor {
Delete delete = new Delete(parent.getRegionName());
delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
deleteMetaTable(catalogTracker, delete);
deleteFromMetaTable(catalogTracker, delete);
LOG.info("Deleted daughters references, qualifier=" + Bytes.toStringBinary(HConstants.SPLITA_QUALIFIER) +
" and qualifier=" + Bytes.toStringBinary(HConstants.SPLITB_QUALIFIER) +
", from parent " + parent.getRegionNameAsString());

View File

@ -78,6 +78,8 @@ public class MasterFileSystem {
private final Path oldLogDir;
// root hbase directory on the FS
private final Path rootdir;
// hbase temp directory used for table construction and deletion
private final Path tempdir;
// create the split log lock
final Lock splitLogLock = new ReentrantLock();
final boolean distributedLogSplitting;
@ -96,6 +98,7 @@ public class MasterFileSystem {
// default localfs. Presumption is that rootdir is fully-qualified before
// we get to here with appropriate fs scheme.
this.rootdir = FSUtils.getRootDir(conf);
this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY);
// Cover both bases, the old way of setting default fs and the new.
// We're supposed to run on 0.20 and 0.21 anyways.
this.fs = this.rootdir.getFileSystem(conf);
@ -133,6 +136,9 @@ public class MasterFileSystem {
// check if the root directory exists
checkRootDir(this.rootdir, conf, this.fs);
// check if temp directory exists and clean it
checkTempDir(this.tempdir, conf, this.fs);
Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME);
// Make sure the region servers can archive their old logs
@ -180,6 +186,13 @@ public class MasterFileSystem {
return this.rootdir;
}
/**
* @return HBase temp dir.
*/
public Path getTempDir() {
return this.tempdir;
}
/**
* @return The unique identifier generated for this cluster
*/
@ -386,6 +399,32 @@ public class MasterFileSystem {
return rd;
}
/**
* Make sure the hbase temp directory exists and is empty.
* NOTE that this method is only executed once just after the master becomes the active one.
*/
private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs)
throws IOException {
// If the temp directory exists, clear the content (left over, from the previous run)
if (fs.exists(tmpdir)) {
// Archive table in temp, maybe left over from failed deletion,
// if not the cleaner will take care of them.
for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) {
for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) {
HFileArchiver.archiveRegion(c, fs, this.rootdir, tabledir, regiondir);
}
}
if (!fs.delete(tmpdir, true)) {
throw new IOException("Unable to clean the temp directory: " + tmpdir);
}
}
// Create the temp directory
if (!fs.mkdirs(tmpdir)) {
throw new IOException("HBase temp directory '" + tmpdir + "' creation failure.");
}
}
private static void bootstrap(final Path rd, final Configuration c)
throws IOException {
LOG.info("BOOTSTRAP: creating ROOT and first META regions");
@ -450,6 +489,37 @@ public class MasterFileSystem {
fs.delete(new Path(rootdir, Bytes.toString(tableName)), true);
}
/**
* Move the specified file/directory to the hbase temp directory.
* @param path The path of the file/directory to move
* @return The temp location of the file/directory moved
* @throws IOException in case of file-system failure
*/
public Path moveToTemp(final Path path) throws IOException {
Path tempPath = new Path(this.tempdir, path.getName());
// Ensure temp exists
if (!fs.exists(tempdir) && !fs.mkdirs(tempdir)) {
throw new IOException("HBase temp directory '" + tempdir + "' creation failure.");
}
if (!fs.rename(path, tempPath)) {
throw new IOException("Unable to move '" + path + "' to temp '" + tempPath + "'");
}
return tempPath;
}
/**
* Move the specified table to the hbase temp directory
* @param tableName Table name to move
* @return The temp location of the table moved
* @throws IOException in case of file-system failure
*/
public Path moveTableToTemp(byte[] tableName) throws IOException {
return moveToTemp(HTableDescriptor.getTableDir(this.rootdir, tableName));
}
public void updateRegionInfo(HRegionInfo region) {
// TODO implement this. i think this is currently broken in trunk i don't
// see this getting updated.

View File

@ -19,12 +19,24 @@
package org.apache.hadoop.hbase.master.handler;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
@ -38,8 +50,9 @@ import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.zookeeper.KeeperException;
/**
@ -48,11 +61,11 @@ import org.apache.zookeeper.KeeperException;
@InterfaceAudience.Private
public class CreateTableHandler extends EventHandler {
private static final Log LOG = LogFactory.getLog(CreateTableHandler.class);
protected final MasterFileSystem fileSystemManager;
protected final HTableDescriptor hTableDescriptor;
protected final Configuration conf;
protected final AssignmentManager assignmentManager;
protected final CatalogTracker catalogTracker;
private MasterFileSystem fileSystemManager;
private final HTableDescriptor hTableDescriptor;
private Configuration conf;
private final AssignmentManager assignmentManager;
private final CatalogTracker catalogTracker;
private final HRegionInfo [] newRegions;
public CreateTableHandler(Server server, MasterFileSystem fileSystemManager,
@ -76,7 +89,9 @@ public class CreateTableHandler extends EventHandler {
}
} catch (InterruptedException e) {
LOG.warn("Interrupted waiting for meta availability", e);
throw new IOException(e);
InterruptedIOException ie = new InterruptedIOException(e.getMessage());
ie.initCause(e);
throw ie;
}
String tableName = this.hTableDescriptor.getNameAsString();
@ -131,40 +146,124 @@ public class CreateTableHandler extends EventHandler {
}
}
private void handleCreateTable(String tableName) throws IOException,
KeeperException {
// 1. Create table descriptor on disk
// TODO: Currently we make the table descriptor and as side-effect the
// tableDir is created. Should we change below method to be createTable
// where we create table in tmp dir with its table descriptor file and then
// do rename to move it into place?
FSTableDescriptors.createTableDescriptor(this.hTableDescriptor, this.conf);
/**
* Responsible of table creation (on-disk and META) and assignment.
* - Create the table directory and descriptor (temp folder)
* - Create the on-disk regions (temp folder)
* [If something fails here: we've just some trash in temp]
* - Move the table from temp to the root directory
* [If something fails here: we've the table in place but some of the rows required
* present in META. (hbck needed)]
* - Add regions to META
* [If something fails here: we don't have regions assigned: table disabled]
* - Assign regions to Region Servers
* [If something fails here: we still have the table in disabled state]
* - Update ZooKeeper with the enabled state
*/
private void handleCreateTable(String tableName) throws IOException, KeeperException {
Path tempdir = fileSystemManager.getTempDir();
FileSystem fs = fileSystemManager.getFileSystem();
// 2. Create regions
List<HRegionInfo> regions = handleCreateRegions(tableName);
if (regions != null && regions.size() > 0) {
// 3. Trigger immediate assignment of the regions in round-robin fashion
ModifyRegionUtils.assignRegions(assignmentManager, regions);
// 1. Create Table Descriptor
FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor);
Path tempTableDir = new Path(tempdir, tableName);
Path tableDir = new Path(fileSystemManager.getRootDir(), tableName);
// 2. Create Regions
List<HRegionInfo> regionInfos = handleCreateHdfsRegions(tempdir, tableName);
// 3. Move Table temp directory to the hbase root location
if (!fs.rename(tempTableDir, tableDir)) {
throw new IOException("Unable to move table from temp=" + tempTableDir +
" to hbase root=" + tableDir);
}
// 4. Set table enabled flag up in zk.
if (regionInfos != null && regionInfos.size() > 0) {
// 4. Add regions to META
MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos);
// 5. Trigger immediate assignment of the regions in round-robin fashion
try {
assignmentManager.getZKTable().
setEnabledTable(this.hTableDescriptor.getNameAsString());
assignmentManager.getRegionStates().createRegionStates(regionInfos);
assignmentManager.assign(regionInfos);
} catch (InterruptedException e) {
LOG.error("Caught " + e + " during round-robin assignment");
InterruptedIOException ie = new InterruptedIOException(e.getMessage());
ie.initCause(e);
throw ie;
}
}
// 6. Set table enabled flag up in zk.
try {
assignmentManager.getZKTable().setEnabledTable(tableName);
} catch (KeeperException e) {
throw new IOException("Unable to ensure that the table will be" +
" enabled because of a ZooKeeper issue", e);
}
}
protected List<HRegionInfo> handleCreateRegions(String tableName) throws IOException {
// 1. create regions
List<HRegionInfo> regions = ModifyRegionUtils.createRegions(conf, fileSystemManager.getRootDir(),
hTableDescriptor, newRegions, catalogTracker);
if (regions != null && regions.size() > 0) {
// 2. add regions to .META.
MetaEditor.addRegionsToMeta(catalogTracker, regions);
/**
* Create the on-disk structure for the table, and returns the regions info.
* @param rootdir directory where the table is being created
* @param tableName name of the table under construction
* @return the list of regions created
*/
protected List<HRegionInfo> handleCreateHdfsRegions(final Path rootdir, final String tableName)
throws IOException {
int regionNumber = newRegions.length;
ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool(
"RegionOpenAndInitThread-" + tableName, regionNumber);
CompletionService<HRegion> completionService = new ExecutorCompletionService<HRegion>(
regionOpenAndInitThreadPool);
List<HRegionInfo> regionInfos = new ArrayList<HRegionInfo>();
for (final HRegionInfo newRegion : newRegions) {
completionService.submit(new Callable<HRegion>() {
public HRegion call() throws IOException {
// 1. Create HRegion
HRegion region = HRegion.createHRegion(newRegion,
rootdir, conf, hTableDescriptor, null,
false, true);
// 2. Close the new region to flush to disk. Close log file too.
region.close();
return region;
}
return regions;
});
}
try {
// 3. wait for all regions to finish creation
for (int i = 0; i < regionNumber; i++) {
Future<HRegion> future = completionService.take();
HRegion region = future.get();
regionInfos.add(region.getRegionInfo());
}
} catch (InterruptedException e) {
throw new InterruptedIOException(e.getMessage());
} catch (ExecutionException e) {
throw new IOException(e.getCause());
} finally {
regionOpenAndInitThreadPool.shutdownNow();
}
return regionInfos;
}
protected ThreadPoolExecutor getRegionOpenAndInitThreadPool(
final String threadNamePrefix, int regionNumber) {
int maxThreads = Math.min(regionNumber, conf.getInt(
"hbase.hregion.open.and.init.threads.max", 10));
ThreadPoolExecutor openAndInitializeThreadPool = Threads
.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
new ThreadFactory() {
private int count = 1;
public Thread newThread(Runnable r) {
Thread t = new Thread(r, threadNamePrefix + "-" + count++);
return t;
}
});
return openAndInitializeThreadPool;
}
}

View File

@ -24,12 +24,16 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
@ -55,6 +59,8 @@ public class DeleteTableHandler extends TableEventHandler {
if (cpHost != null) {
cpHost.preDeleteTableHandler(this.tableName);
}
// 1. Wait because of region in transition
AssignmentManager am = this.masterServices.getAssignmentManager();
long waitTime = server.getConfiguration().
getLong("hbase.master.wait.on.region", 5 * 60 * 1000);
@ -71,21 +77,32 @@ public class DeleteTableHandler extends TableEventHandler {
waitTime + "ms) for region to leave region " +
region.getRegionNameAsString() + " in transitions");
}
LOG.debug("Deleting region " + region.getRegionNameAsString() +
" from META and FS");
// Remove region from META
MetaEditor.deleteRegion(this.server.getCatalogTracker(), region);
// Delete region from FS
this.masterServices.getMasterFileSystem().deleteRegion(region);
}
// Delete table from FS
this.masterServices.getMasterFileSystem().deleteTable(tableName);
// Update table descriptor cache
// 2. Remove regions from META
LOG.debug("Deleting regions from META");
MetaEditor.deleteRegions(this.server.getCatalogTracker(), regions);
// 3. Move the table in /hbase/.tmp
MasterFileSystem mfs = this.masterServices.getMasterFileSystem();
Path tempTableDir = mfs.moveTableToTemp(tableName);
// 4. Update table descriptor cache
this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName));
// If entry for this table in zk, and up in AssignmentManager, remove it.
// 5. If entry for this table in zk, and up in AssignmentManager, remove it.
am.getZKTable().setDeletedTable(Bytes.toString(tableName));
// 6. Delete regions from FS (temp directory)
FileSystem fs = mfs.getFileSystem();
for (HRegionInfo hri: regions) {
LOG.debug("Deleting region " + hri.getRegionNameAsString() + " from FS");
HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, mfs.getRootDir(),
tempTableDir, new Path(tempTableDir, hri.getEncodedName()));
}
// 7. Delete table from FS (temp directory)
fs.delete(tempTableDir, true);
if (cpHost != null) {
cpHost.postDeleteTableHandler(this.tableName);
}

View File

@ -98,6 +98,24 @@ public class HFileArchiveUtil {
return HRegion.getRegionDir(archiveDir, encodedRegionName);
}
/**
* Get the archive directory for a given region under the specified table
* @param rootdir {@link Path} to the root directory where hbase files are stored (for building
* the archive path)
* @param tabledir the original table directory. Cannot be null.
* @param regiondir the path to the region directory. Cannot be null.
* @return {@link Path} to the directory to archive the given region, or <tt>null</tt> if it
* should not be archived
*/
public static Path getRegionArchiveDir(Path rootdir, Path tabledir, Path regiondir) {
// get the archive directory for a table
Path archiveDir = getTableArchivePath(rootdir, tabledir.getName());
// then add on the region path under the archive
String encodedRegionName = regiondir.getName();
return HRegion.getRegionDir(archiveDir, encodedRegionName);
}
/**
* Get the path to the table archive directory based on the configured archive directory.
* <p>
@ -109,7 +127,22 @@ public class HFileArchiveUtil {
*/
public static Path getTableArchivePath(Path tabledir) {
Path root = tabledir.getParent();
return new Path(new Path(root,HConstants.HFILE_ARCHIVE_DIRECTORY), tabledir.getName());
return getTableArchivePath(root, tabledir.getName());
}
/**
* Get the path to the table archive directory based on the configured archive directory.
* <p>
* Get the path to the table's archive directory.
* <p>
* Generally of the form: /hbase/.archive/[tablename]
* @param rootdir {@link Path} to the root directory where hbase files are stored (for building
* the archive path)
* @param tableName Name of the table to be archived. Cannot be null.
* @return {@link Path} to the archive directory for the table
*/
public static Path getTableArchivePath(final Path rootdir, final String tableName) {
return new Path(getArchivePath(rootdir), tableName);
}
/**
@ -133,6 +166,16 @@ public class HFileArchiveUtil {
* @throws IOException if an unexpected error occurs
*/
public static Path getArchivePath(Configuration conf) throws IOException {
return new Path(FSUtils.getRootDir(conf), HConstants.HFILE_ARCHIVE_DIRECTORY);
return getArchivePath(FSUtils.getRootDir(conf));
}
/**
* Get the full path to the archive directory on the configured {@link FileSystem}
* @param rootdir {@link Path} to the root directory where hbase files are stored (for building
* the archive path)
* @return the full {@link Path} to the archive directory, as defined by the configuration
*/
private static Path getArchivePath(final Path rootdir) {
return new Path(rootdir, HConstants.HFILE_ARCHIVE_DIRECTORY);
}
}

View File

@ -108,6 +108,8 @@ public abstract class ModifyRegionUtils {
HRegion region = HRegion.createHRegion(newRegion,
rootDir, conf, hTableDescriptor, null,
false, true);
HRegion.writeRegioninfoOnFilesystem(region.getRegionInfo(), region.getRegionDir(),
region.getFilesystem(), conf);
try {
// 2. Custom user code to interact with the created region
if (task != null) {

View File

@ -50,9 +50,10 @@ public class TestHFileArchiveUtil {
@Test
public void testRegionArchiveDir() {
Configuration conf = null;
Path tableDir = new Path("table");
Path regionDir = new Path("region");
assertNotNull(HFileArchiveUtil.getRegionArchiveDir(null, tableDir, regionDir));
assertNotNull(HFileArchiveUtil.getRegionArchiveDir(conf, tableDir, regionDir));
}
@Test