diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java index 92e7b3c00e9..6e39517fe4e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/FSHLog.java @@ -1752,18 +1752,11 @@ public class FSHLog implements WAL { } // TODO: Check size and if big go ahead and call a sync if we have enough data. - - // If not a batch, return to consume more events from the ring buffer before proceeding; - // we want to get up a batch of syncs and appends before we go do a filesystem sync. - if (!endOfBatch || this.syncFuturesCount <= 0) return; - - // Now we have a batch. - - if (LOG.isTraceEnabled()) { - LOG.trace("Sequence=" + sequence + ", syncCount=" + this.syncFuturesCount); - } - + // This is a sync. If existing exception, fall through. Else look to see if batch. if (this.exception == null) { + // If not a batch, return to consume more events from the ring buffer before proceeding; + // we want to get up a batch of syncs and appends before we go do a filesystem sync. + if (!endOfBatch || this.syncFuturesCount <= 0) return; // Below expects that the offer 'transfers' responsibility for the outstanding syncs to // the syncRunner. We should never get an exception in here. this.syncRunnerIndex = (this.syncRunnerIndex + 1) % this.syncRunners.length; @@ -1779,7 +1772,9 @@ public class FSHLog implements WAL { // We may have picked up an exception above trying to offer sync if (this.exception != null) { cleanupOutstandingSyncsOnException(sequence, - new DamagedWALException("On sync", this.exception)); + this.exception instanceof DamagedWALException? + this.exception: + new DamagedWALException("On sync", this.exception)); } attainSafePoint(sequence); this.syncFuturesCount = 0; @@ -1874,8 +1869,7 @@ public class FSHLog implements WAL { // Update metrics. postAppend(entry, EnvironmentEdgeManager.currentTime() - start); } catch (Exception e) { - String msg = "Append sequenceId=" + regionSequenceId + - ", requesting roll of WAL"; + String msg = "Append sequenceId=" + regionSequenceId + ", requesting roll of WAL"; LOG.warn(msg, e); requestLogRoll(); throw new DamagedWALException(msg, e); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALLockup.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALLockup.java index bc8dc3d58a8..56c6c602f6b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALLockup.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestWALLockup.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -103,7 +104,7 @@ public class TestWALLockup { *
First I need to set up some mocks for Server and RegionServerServices. I also need to * set up a dodgy WAL that will throw an exception when we go to append to it. */ - @Test (timeout=15000) + @Test (timeout=20000) public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException { // A WAL that we can have throw exceptions when a flag is set. class DodgyFSLog extends FSHLog { @@ -127,7 +128,13 @@ public class TestWALLockup { if (throwException) { try { LOG.info("LATCHED"); - this.latch.await(); + // So, timing can have it that the test can run and the bad flush below happens + // before we get here. In this case, we'll be stuck waiting on this latch but there + // is nothing in the WAL pipeline to get us to the below beforeWaitOnSafePoint... + // because all WALs have rolled. In this case, just give up on test. + if (!this.latch.await(5, TimeUnit.SECONDS)) { + LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!"); + } } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -222,8 +229,6 @@ public class TestWALLockup { dodgyWAL.throwException = true; // This append provokes a WAL roll request dodgyWAL.append(htd, region.getRegionInfo(), key, edit, true); - // Now wait until the dodgy WAL is latched. - while (dodgyWAL.latch.getCount() <= 0) Threads.sleep(1); boolean exception = false; try { dodgyWAL.sync();