diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java index a5d88381748..3426e51a636 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java @@ -133,6 +133,7 @@ public class HLog implements Syncable { private final String prefix; private final AtomicLong unflushedEntries = new AtomicLong(0); private volatile long syncedTillHere = 0; + private long lastDeferredTxid; private final Path oldLogDir; private boolean logRollRunning; @@ -196,6 +197,7 @@ public class HLog implements Syncable { //number of transactions in the current Hlog. private final AtomicInteger numEntries = new AtomicInteger(0); + // If live datanode count is lower than the default replicas value, // RollWriter will be triggered in each sync(So the RollWriter will be // triggered one by one in a short time). Using it as a workaround to slow @@ -813,9 +815,12 @@ public class HLog implements Syncable { } catch (IOException e) { LOG.error("Failed close of HLog writer", e); int errors = closeErrorCount.incrementAndGet(); - if (errors <= closeErrorsTolerated) { + if (errors <= closeErrorsTolerated && !hasDeferredEntries()) { LOG.warn("Riding over HLog close failure! error count="+errors); } else { + if (hasDeferredEntries()) { + LOG.error("Aborting due to unflushed edits in HLog"); + } // Failed close of log file. Means we're losing edits. For now, // shut ourselves down to minimize loss. Alternative is to try and // keep going. See HBASE-930. @@ -990,6 +995,9 @@ public class HLog implements Syncable { doWrite(regionInfo, logKey, logEdit, htd); txid = this.unflushedEntries.incrementAndGet(); this.numEntries.incrementAndGet(); + if (htd.isDeferredLogFlush()) { + lastDeferredTxid = txid; + } } // Sync if catalog region, and if not then check if that table supports @@ -1068,6 +1076,9 @@ public class HLog implements Syncable { doWrite(info, logKey, edits, htd); this.numEntries.incrementAndGet(); txid = this.unflushedEntries.incrementAndGet(); + if (htd.isDeferredLogFlush()) { + lastDeferredTxid = txid; + } } // Sync if catalog region, and if not then check if that table supports // deferred log flushing @@ -1789,6 +1800,11 @@ public class HLog implements Syncable { return coprocessorHost; } + /** Provide access to currently deferred sequence num for tests */ + boolean hasDeferredEntries() { + return lastDeferredTxid > syncedTillHere; + } + /** * Pass one or more log file names and it will either dump out a text version * on stdout or split the specified log files. diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRollAbort.java b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRollAbort.java new file mode 100644 index 00000000000..717e7472d3d --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRollAbort.java @@ -0,0 +1,152 @@ +package org.apache.hadoop.hbase.regionserver.wal; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.namenode.LeaseManager; +import org.apache.log4j.Level; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests for conditions that should trigger RegionServer aborts when + * rolling the current HLog fails. + */ +public class TestLogRollAbort { + private static final Log LOG = LogFactory.getLog(TestLogRolling.class); + private static MiniDFSCluster dfsCluster; + private static HBaseAdmin admin; + private static MiniHBaseCluster cluster; + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + // verbose logging on classes that are touched in these tests + { + ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); + ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); + ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem")) + .getLogger().setLevel(Level.ALL); + ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL); + ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL); + ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL); + } + + // Need to override this setup so we can edit the config before it gets sent + // to the HDFS & HBase cluster startup. + @BeforeClass + public static void setUpBeforeClass() throws Exception { + // Tweak default timeout values down for faster recovery + TEST_UTIL.getConfiguration().setInt( + "hbase.regionserver.logroll.errors.tolerated", 2); + TEST_UTIL.getConfiguration().setInt("ipc.ping.interval", 10 * 1000); + TEST_UTIL.getConfiguration().setInt("ipc.socket.timeout", 10 * 1000); + TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000); + + // Increase the amount of time between client retries + TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000); + + // make sure log.hflush() calls syncFs() to open a pipeline + TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); + // lower the namenode & datanode heartbeat so the namenode + // quickly detects datanode failures + TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000); + TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1); + // the namenode might still try to choose the recently-dead datanode + // for a pipeline, so try to a new pipeline multiple times + TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10); + // set periodic sync to 2 min so it doesn't run during test + TEST_UTIL.getConfiguration().setInt("hbase.regionserver.optionallogflushinterval", + 120 * 1000); + } + + @Before + public void setUp() throws Exception { + TEST_UTIL.startMiniCluster(2); + + cluster = TEST_UTIL.getHBaseCluster(); + dfsCluster = TEST_UTIL.getDFSCluster(); + admin = TEST_UTIL.getHBaseAdmin(); + + // disable region rebalancing (interferes with log watching) + cluster.getMaster().balanceSwitch(false); + } + + @After + public void tearDown() throws IOException { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * Tests that RegionServer aborts if we hit an error closing the WAL when + * there are unsynced WAL edits. See HBASE-4282. + */ + @Test + public void testRSAbortWithUnflushedEdits() throws Exception { + LOG.info("Starting testRSAbortWithUnflushedEdits()"); + + // When the META table can be opened, the region servers are running + new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME); + + // Create the test table and open it + String tableName = this.getClass().getSimpleName(); + HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY)); + desc.setDeferredLogFlush(true); + + admin.createTable(desc); + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + + HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName)); + HLog log = server.getWAL(); + + assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas()); + // don't run this test without append support (HDFS-200 & HDFS-142) + assertTrue("Need append support for this test", + FSUtils.isAppendSupported(TEST_UTIL.getConfiguration())); + + Put p = new Put(Bytes.toBytes("row2001")); + p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001)); + table.put(p); + + log.sync(); + + p = new Put(Bytes.toBytes("row2002")); + p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002)); + table.put(p); + + dfsCluster.restartDataNodes(); + LOG.info("Restarted datanodes"); + + assertTrue("Should have an outstanding WAL edit", log.hasDeferredEntries()); + try { + log.rollWriter(true); + fail("Log roll should have triggered FailedLogCloseException"); + } catch (FailedLogCloseException flce) { + assertTrue("Should have deferred flush log edits outstanding", + log.hasDeferredEntries()); + } + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java index f4cedf4d139..041e75712f4 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -462,11 +463,10 @@ public class TestLogRolling { // roll all datanodes in the pipeline dfsCluster.restartDataNodes(); - Thread.sleep(10000); + Thread.sleep(1000); dfsCluster.waitActive(); LOG.info("Data Nodes restarted"); - //this.log.sync(); // this write should succeed, but trigger a log roll writeData(table, 1003); long newFilenum = log.getFilenum(); @@ -474,12 +474,11 @@ public class TestLogRolling { assertTrue("Missing datanode should've triggered a log roll", newFilenum > oldFilenum && newFilenum > curTime); - //this.log.sync(); writeData(table, 1004); // roll all datanode again dfsCluster.restartDataNodes(); - Thread.sleep(10000); + Thread.sleep(1000); dfsCluster.waitActive(); LOG.info("Data Nodes restarted"); @@ -536,5 +535,11 @@ public class TestLogRolling { } finally { scanner.close(); } + + // verify that no region servers aborted + for (JVMClusterUtil.RegionServerThread rsThread: + TEST_UTIL.getHBaseCluster().getRegionServerThreads()) { + assertFalse(rsThread.getRegionServer().isAborted()); + } } }