From 42ae68691241a2789137272586989acbcfd34f35 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Tue, 9 Aug 2011 03:47:45 +0000 Subject: [PATCH] HBASE-4169 FSUtils LeaseRecovery for non HDFS FileSystems git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1155200 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hbase/regionserver/wal/HLogSplitter.java | 4 +- .../apache/hadoop/hbase/util/FSHDFSUtils.java | 110 ++++++++++++++++++ .../apache/hadoop/hbase/util/FSMapRUtils.java | 42 +++++++ .../org/apache/hadoop/hbase/util/FSUtils.java | 105 ++++------------- .../hadoop/hbase/util/RegionSplitter.java | 3 +- .../hbase/regionserver/wal/TestHLog.java | 3 +- 7 files changed, 183 insertions(+), 85 deletions(-) create mode 100644 src/main/java/org/apache/hadoop/hbase/util/FSMapRUtils.java diff --git a/CHANGES.txt b/CHANGES.txt index 9a7c4400c11..dfe5ae92f35 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -363,6 +363,7 @@ Release 0.91.0 - Unreleased HBASE-4158 Upgrade pom.xml to surefire 2.9 (Aaron Kushner & Mikhail) HBASE-3899 Add ability for delayed RPC calls to set return value immediately at call return. (Vlad Dogaru via todd) + HBASE-4169 FSUtils LeaseRecovery for non HDFS FileSystems (Lohit Vijayarenu) TASKS HBASE-3559 Move report of split to master OFF the heartbeat channel diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java index 2c7571ca91f..9d87fa54201 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogSplitter.java @@ -19,8 +19,6 @@ */ package org.apache.hadoop.hbase.regionserver.wal; -import static org.apache.hadoop.hbase.util.FSUtils.recoverFileLease; - import java.io.EOFException; import java.io.IOException; import java.io.InterruptedIOException; @@ -688,7 +686,7 @@ public class HLogSplitter { } try { - recoverFileLease(fs, path, conf); + FSUtils.getInstance(fs, conf).recoverFileLease(fs, path, conf); try { in = getReader(fs, path, conf); } catch (EOFException e) { diff --git a/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java index e69de29bb2d..dcd093776dc 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java +++ b/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.reflect.InvocationTargetException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; +import org.apache.hadoop.hdfs.protocol.FSConstants; +import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; + + +/** + * Implementation for hdfs + */ +public class FSHDFSUtils extends FSUtils{ + private static final Log LOG = LogFactory.getLog(FSHDFSUtils.class); + + public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) + throws IOException{ + if (!isAppendSupported(conf)) { + LOG.warn("Running on HDFS without append enabled may result in data loss"); + return; + } + // lease recovery not needed for local file system case. + // currently, local file system doesn't implement append either. + if (!(fs instanceof DistributedFileSystem)) { + return; + } + LOG.info("Recovering file " + p); + long startWaiting = System.currentTimeMillis(); + + // Trying recovery + boolean recovered = false; + while (!recovered) { + try { + try { + if (fs instanceof DistributedFileSystem) { + DistributedFileSystem dfs = (DistributedFileSystem)fs; + DistributedFileSystem.class.getMethod("recoverLease", + new Class[] {Path.class}).invoke(dfs, p); + } else { + throw new Exception("Not a DistributedFileSystem"); + } + } catch (InvocationTargetException ite) { + // function was properly called, but threw it's own exception + throw (IOException) ite.getCause(); + } catch (Exception e) { + LOG.debug("Failed fs.recoverLease invocation, " + e.toString() + + ", trying fs.append instead"); + FSDataOutputStream out = fs.append(p); + out.close(); + } + recovered = true; + } catch (IOException e) { + e = RemoteExceptionHandler.checkIOException(e); + if (e instanceof AlreadyBeingCreatedException) { + // We expect that we'll get this message while the lease is still + // within its soft limit, but if we get it past that, it means + // that the RS is holding onto the file even though it lost its + // znode. We could potentially abort after some time here. + long waitedFor = System.currentTimeMillis() - startWaiting; + if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) { + LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p + + ":" + e.getMessage()); + } + } else if (e instanceof LeaseExpiredException && + e.getMessage().contains("File does not exist")) { + // This exception comes out instead of FNFE, fix it + throw new FileNotFoundException( + "The given HLog wasn't found at " + p.toString()); + } else { + throw new IOException("Failed to open " + p + " for append", e); + } + } + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + new InterruptedIOException().initCause(ex); + } + } + LOG.info("Finished lease recover attempt for " + p); + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/util/FSMapRUtils.java b/src/main/java/org/apache/hadoop/hbase/util/FSMapRUtils.java new file mode 100644 index 00000000000..edca7f4d5ef --- /dev/null +++ b/src/main/java/org/apache/hadoop/hbase/util/FSMapRUtils.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * MapR implementation. + */ +public class FSMapRUtils { + private static final Log LOG = LogFactory.getLog(FSMapRUtils.class); + + public static void recoverFileLease(final FileSystem fs, final Path p, + Configuration conf) throws IOException { + LOG.info("Recovering file " + p.toString() + + " by changing permission to readonly"); + FsPermission roPerm = new FsPermission((short) 0444); + fs.setPermission(p, roPerm); + } +} diff --git a/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java b/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java index 81978d212de..07294104cf2 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -35,18 +35,15 @@ import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.FSConstants; -import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import java.io.DataInputStream; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; @@ -57,15 +54,25 @@ import java.util.Map; /** * Utility methods for interacting with the underlying file system. */ -public class FSUtils { +public abstract class FSUtils { private static final Log LOG = LogFactory.getLog(FSUtils.class); - /** - * Not instantiable - */ - private FSUtils() { + protected FSUtils() { super(); } + + public static FSUtils getInstance(FileSystem fs, Configuration conf) { + String scheme = fs.getUri().getScheme(); + if (scheme == null) { + LOG.warn("Could not find scheme for uri " + + fs.getUri() + ", default to hdfs"); + scheme = "hdfs"; + } + Class fsUtilsClass = conf.getClass("hbase.fsutil." + + scheme + ".impl", FSHDFSUtils.class); // Default to HDFS impl + FSUtils fsUtils = (FSUtils)ReflectionUtils.newInstance(fsUtilsClass, conf); + return fsUtils; + } /** * Delete if exists. @@ -768,79 +775,17 @@ public class FSUtils { return scheme.equalsIgnoreCase("hdfs"); } - /* - * Recover file lease. Used when a file might be suspect to be had been left open by another process. p - * @param fs - * @param p - * @param append True if append supported + /** + * Recover file lease. Used when a file might be suspect + * to be had been left open by another process. + * @param fs FileSystem handle + * @param p Path of file to recover lease + * @param conf Configuration handle * @throws IOException */ - public static void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) - throws IOException{ - if (!isAppendSupported(conf)) { - LOG.warn("Running on HDFS without append enabled may result in data loss"); - return; - } - // lease recovery not needed for local file system case. - // currently, local file system doesn't implement append either. - if (!(fs instanceof DistributedFileSystem)) { - return; - } - LOG.info("Recovering file " + p); - long startWaiting = System.currentTimeMillis(); - - // Trying recovery - boolean recovered = false; - while (!recovered) { - try { - try { - if (fs instanceof DistributedFileSystem) { - DistributedFileSystem dfs = (DistributedFileSystem)fs; - DistributedFileSystem.class.getMethod("recoverLease", - new Class[] {Path.class}).invoke(dfs, p); - } else { - throw new Exception("Not a DistributedFileSystem"); - } - } catch (InvocationTargetException ite) { - // function was properly called, but threw it's own exception - throw (IOException) ite.getCause(); - } catch (Exception e) { - LOG.debug("Failed fs.recoverLease invocation, " + e.toString() + - ", trying fs.append instead"); - FSDataOutputStream out = fs.append(p); - out.close(); - } - recovered = true; - } catch (IOException e) { - e = RemoteExceptionHandler.checkIOException(e); - if (e instanceof AlreadyBeingCreatedException) { - // We expect that we'll get this message while the lease is still - // within its soft limit, but if we get it past that, it means - // that the RS is holding onto the file even though it lost its - // znode. We could potentially abort after some time here. - long waitedFor = System.currentTimeMillis() - startWaiting; - if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) { - LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p + - ":" + e.getMessage()); - } - } else if (e instanceof LeaseExpiredException && - e.getMessage().contains("File does not exist")) { - // This exception comes out instead of FNFE, fix it - throw new FileNotFoundException( - "The given HLog wasn't found at " + p.toString()); - } else { - throw new IOException("Failed to open " + p + " for append", e); - } - } - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - new InterruptedIOException().initCause(ex); - } - } - LOG.info("Finished lease recover attempt for " + p); - } - + public abstract void recoverFileLease(final FileSystem fs, final Path p, + Configuration conf) throws IOException; + /** * @param fs * @param rootdir diff --git a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java index 8ded4b53c4f..a0bef347d19 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java +++ b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java @@ -651,7 +651,8 @@ public class RegionSplitter { fs.rename(tmpFile, splitFile); } else { LOG.debug("_balancedSplit file found. Replay log to restore state..."); - FSUtils.recoverFileLease(fs, splitFile, table.getConfiguration()); + FSUtils.getInstance(fs, table.getConfiguration()) + .recoverFileLease(fs, splitFile, table.getConfiguration()); // parse split file and process remaining splits FSDataInputStream tmpIn = fs.open(splitFile); diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java index 81142321117..6d89e66f083 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java @@ -408,7 +408,8 @@ public class TestHLog { public Exception exception = null; public void run() { try { - FSUtils.recoverFileLease(recoveredFs, walPath, rlConf); + FSUtils.getInstance(fs, rlConf) + .recoverFileLease(recoveredFs, walPath, rlConf); } catch (IOException e) { exception = e; }