From 06d1a00f837c461db2803d8b2940ea956561631a Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 15 Oct 2008 21:44:26 +0000 Subject: [PATCH] HBASE-930 RegionServer stuck: HLog: Could not append. Requesting close of log java.io.IOException: Could not get block locations. Aborting... git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/branches/0.18@705064 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 ++ .../apache/hadoop/hbase/regionserver/Flusher.java | 2 +- .../org/apache/hadoop/hbase/regionserver/HLog.java | 12 ++++++++++-- .../apache/hadoop/hbase/regionserver/LogRoller.java | 5 ++++- src/java/org/apache/hadoop/hbase/util/FSUtils.java | 2 -- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1a3942ece7a..1e02185ede8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -13,6 +13,8 @@ Release 0.18.1 - Unreleased HBASE-928 NPE throwing RetriesExhaustedException HBASE-576 Investigate IPC performance; partial. HBASE-924 Update hadoop in lib on 0.18 hbase branch to 0.18.1 + HBASE-930 RegionServer stuck: HLog: Could not append. Requesting close of + log java.io.IOException: Could not get block locations. Aborting... IMPROVEMENTS HBASE-920 Make region balancing sloppier diff --git a/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java b/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java index 21081c9b783..28832571e18 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/Flusher.java @@ -178,7 +178,7 @@ class Flusher extends Thread implements FlushRequester { // is required. Currently the only way to do this is a restart of // the server. Abort because hdfs is probably bad (HBASE-644 is a case // where hdfs was bad but passed the hdfs check). - LOG.fatal("Replay of hlog required. Forcing server restart", ex); + LOG.fatal("Replay of hlog required. Forcing server shutdown", ex); server.abort(); return false; } catch (IOException ex) { diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HLog.java b/src/java/org/apache/hadoop/hbase/regionserver/HLog.java index adec3759fe5..e074d305cf6 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HLog.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HLog.java @@ -226,9 +226,10 @@ public class HLog implements HConstants { * cacheFlushLock and then completeCacheFlush could be called which would wait * for the lock on this and consequently never release the cacheFlushLock * + * @throws FailedLogCloseException * @throws IOException */ - public void rollWriter() throws IOException { + public void rollWriter() throws FailedLogCloseException, IOException { this.cacheFlushLock.lock(); try { if (closed) { @@ -237,7 +238,14 @@ public class HLog implements HConstants { synchronized (updateLock) { if (this.writer != null) { // Close the current writer, get a new one. - this.writer.close(); + try { + this.writer.close(); + } catch (IOException e) { + // Failed close of log file. Means we're losing edits. For now, + // shut ourselves down to minimize loss. Alternative is to try and + // keep going. See HBASE-930. + throw new FailedLogCloseException("#" + this.filenum, e); + } Path p = computeFilename(old_filenum); if (LOG.isDebugEnabled()) { LOG.debug("Closing current log writer " + FSUtils.getPath(p)); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java b/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java index 790a3677826..886d66e831c 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/LogRoller.java @@ -77,8 +77,11 @@ class LogRoller extends Thread implements LogRollListener { try { LOG.info("Rolling hlog. Number of entries: " + server.getLog().getNumEntries()); server.getLog().rollWriter(); + } catch (FailedLogCloseException e) { + LOG.fatal("Forcing server shutdown", e); + server.abort(); } catch (IOException ex) { - LOG.error("Log rolling failed", + LOG.error("Log rolling failed with ioe: ", RemoteExceptionHandler.checkIOException(ex)); server.checkFileSystem(); } catch (Exception ex) { diff --git a/src/java/org/apache/hadoop/hbase/util/FSUtils.java b/src/java/org/apache/hadoop/hbase/util/FSUtils.java index cf74c719962..51b493e68ba 100644 --- a/src/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/src/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -72,10 +72,8 @@ public class FSUtils { } catch (IOException e) { exception = RemoteExceptionHandler.checkIOException(e); } - try { fs.close(); - } catch (Exception e) { LOG.error("file system close failed: ", e); }