diff --git a/CHANGES.txt b/CHANGES.txt index 13f460787b9..7bfc697a979 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -817,6 +817,8 @@ Release 0.21.0 - Unreleased HBASE-2879 Offer ZK CLI outside of HBase Shell (Nicolas Spiegelberg via Stack) HBASE-2886 Add search box to site (Alex Baranau via Stack) + HBASE-2792 Create a better way to chain log cleaners + (Chongxin Li via Stack) NEW FEATURES HBASE-1961 HBase EC2 scripts diff --git a/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java b/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java index 3ca36117f24..1c338310104 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java +++ b/src/main/java/org/apache/hadoop/hbase/master/LogCleanerDelegate.java @@ -20,16 +20,22 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; /** - * Interface for the log cleaning function inside the master. Only 1 is called - * so if the desired effect is the mix of many cleaners, do call them yourself - * in order to control the flow. - * HBase ships with OldLogsCleaner as the default implementation. + * Interface for the log cleaning function inside the master. By default, three + * cleaners TimeToLiveLogCleaner, ReplicationLogCleaner, + * SnapshotLogCleaner are called in order. So if other effects are + * needed, implement your own LogCleanerDelegate and add it to the configuration + * "hbase.master.logcleaner.plugins", which is a comma-separated list of fully + * qualified class names. LogsCleaner will add it to the chain. + * + * HBase ships with LogsCleaner as the default implementation. + * * This interface extends Configurable, so setConf needs to be called once * before using the cleaner. + * Since LogCleanerDelegates are created in LogsCleaner by reflection. Classes + * that implements this interface should provide a default constructor. */ public interface LogCleanerDelegate extends Configurable { diff --git a/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java b/src/main/java/org/apache/hadoop/hbase/master/LogsCleaner.java similarity index 56% rename from src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java rename to src/main/java/org/apache/hadoop/hbase/master/LogsCleaner.java index 37b2c3cb6a1..9d1a8b8f57b 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java +++ b/src/main/java/org/apache/hadoop/hbase/master/LogsCleaner.java @@ -31,24 +31,25 @@ import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.regionserver.wal.HLog; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.regex.Pattern; /** * This Chore, everytime it runs, will clear the logs in the old logs folder - * that are older than hbase.master.logcleaner.ttl and, in order to limit the + * that are deletable for each log cleaner in the chain, in order to limit the * number of deletes it sends, will only delete maximum 20 in a single run. */ -public class OldLogsCleaner extends Chore { +public class LogsCleaner extends Chore { - static final Log LOG = LogFactory.getLog(OldLogsCleaner.class.getName()); + static final Log LOG = LogFactory.getLog(LogsCleaner.class.getName()); // Max number we can delete on every chore, this is to make sure we don't // issue thousands of delete commands around the same time private final int maxDeletedLogs; private final FileSystem fs; private final Path oldLogDir; - private final LogCleanerDelegate logCleaner; + private List logCleanersChain; private final Configuration conf; /** @@ -59,36 +60,65 @@ public class OldLogsCleaner extends Chore { * @param fs handle to the FS * @param oldLogDir the path to the archived logs */ - public OldLogsCleaner(final int p, final AtomicBoolean s, + public LogsCleaner(final int p, final AtomicBoolean s, Configuration conf, FileSystem fs, Path oldLogDir) { - super("OldLogsCleaner", p, s); - // Use the log cleaner provided by replication if enabled, unless something - // was already provided - if (conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY, false) && - conf.get("hbase.master.logcleanerplugin.impl") == null) { - conf.set("hbase.master.logcleanerplugin.impl", - "org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner"); - } + super("LogsCleaner", p, s); + this.maxDeletedLogs = conf.getInt("hbase.master.logcleaner.maxdeletedlogs", 20); this.fs = fs; this.oldLogDir = oldLogDir; this.conf = conf; - this.logCleaner = getLogCleaner(); + this.logCleanersChain = new LinkedList(); + + initLogCleanersChain(); } - private LogCleanerDelegate getLogCleaner() { + /* + * Initialize the chain of log cleaners from the configuration. The default + * three LogCleanerDelegates in this chain are: TimeToLiveLogCleaner, + * ReplicationLogCleaner and SnapshotLogCleaner. + */ + private void initLogCleanersChain() { + String[] logCleaners = conf.getStrings("hbase.master.logcleaner.plugins"); + if (logCleaners != null) { + for (String className : logCleaners) { + LogCleanerDelegate logCleaner = newLogCleaner(className, conf); + addLogCleaner(logCleaner); + } + } + } + + /** + * A utility method to create new instances of LogCleanerDelegate based + * on the class name of the LogCleanerDelegate. + * @param className fully qualified class name of the LogCleanerDelegate + * @param conf + * @return the new instance + */ + public static LogCleanerDelegate newLogCleaner(String className, Configuration conf) { try { - Class c = Class.forName(conf.get("hbase.master.logcleanerplugin.impl", - TimeToLiveLogCleaner.class.getCanonicalName())); + Class c = Class.forName(className); LogCleanerDelegate cleaner = (LogCleanerDelegate) c.newInstance(); cleaner.setConf(conf); return cleaner; - } catch (Exception e) { - LOG.warn("Passed log cleaner implementation throws errors, " + - "defaulting to TimeToLiveLogCleaner", e); - return new TimeToLiveLogCleaner(); + } catch(Exception e) { + LOG.warn("Can NOT create LogCleanerDelegate: " + className, e); + // skipping if can't instantiate + return null; + } + } + + /** + * Add a LogCleanerDelegate to the log cleaner chain. A log file is deletable + * if it is deletable for each LogCleanerDelegate in the chain. + * @param logCleaner + */ + public void addLogCleaner(LogCleanerDelegate logCleaner) { + if (logCleaner != null && !logCleanersChain.contains(logCleaner)) { + logCleanersChain.add(logCleaner); + LOG.debug("Add log cleaner in chain: " + logCleaner.getClass().getName()); } } @@ -97,13 +127,18 @@ public class OldLogsCleaner extends Chore { try { FileStatus[] files = this.fs.listStatus(this.oldLogDir); int nbDeletedLog = 0; - for (FileStatus file : files) { + FILE: for (FileStatus file : files) { Path filePath = file.getPath(); if (HLog.validateHLogFilename(filePath.getName())) { - if (logCleaner.isLogDeletable(filePath) ) { - this.fs.delete(filePath, true); - nbDeletedLog++; + for (LogCleanerDelegate logCleaner : logCleanersChain) { + if (!logCleaner.isLogDeletable(filePath) ) { + // this log is not deletable, continue to process next log file + continue FILE; + } } + // delete this log file if it passes all the log cleaners + this.fs.delete(filePath, true); + nbDeletedLog++; } else { LOG.warn("Found a wrongly formated file: " + file.getPath().getName()); diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 9fb1ccebf5e..558ff10424c 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -96,7 +96,7 @@ public class ServerManager { private int minimumServerCount; - private final OldLogsCleaner oldLogCleaner; + private final LogsCleaner logCleaner; /* * Dumps into log current stats on dead servers and number of servers @@ -150,11 +150,11 @@ public class ServerManager { String n = Thread.currentThread().getName(); Threads.setDaemonThreadRunning(this.serverMonitorThread, n + ".serverMonitor"); - this.oldLogCleaner = new OldLogsCleaner( + this.logCleaner = new LogsCleaner( c.getInt("hbase.master.meta.thread.rescanfrequency",60 * 1000), this.master.getShutdownRequested(), c, master.getFileSystem(), master.getOldLogDir()); - Threads.setDaemonThreadRunning(oldLogCleaner, + Threads.setDaemonThreadRunning(logCleaner, n + ".oldLogCleaner"); } diff --git a/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java b/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java index eb859aaf121..4d4b00a4da4 100644 --- a/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java +++ b/src/main/java/org/apache/hadoop/hbase/replication/master/ReplicationLogCleaner.java @@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.LogCleanerDelegate; -import org.apache.hadoop.hbase.master.TimeToLiveLogCleaner; import org.apache.hadoop.hbase.replication.ReplicationZookeeperWrapper; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; import org.apache.zookeeper.WatchedEvent; @@ -44,8 +43,7 @@ import java.util.concurrent.atomic.AtomicBoolean; public class ReplicationLogCleaner implements LogCleanerDelegate, Watcher { private static final Log LOG = - LogFactory.getLog(ReplicationLogCleaner.class); - private TimeToLiveLogCleaner ttlCleaner; + LogFactory.getLog(ReplicationLogCleaner.class); private Configuration conf; private ReplicationZookeeperWrapper zkHelper; private Set hlogs = new HashSet(); @@ -57,12 +55,6 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Watcher { @Override public boolean isLogDeletable(Path filePath) { - - // Don't bother going further if the hlog isn't even expired - if (!ttlCleaner.isLogDeletable(filePath)) { - LOG.debug("Won't delete log since not past due " + filePath); - return false; - } String log = filePath.getName(); // If we saw the hlog previously, let's consider it's still used // At some point in the future we will refresh the list and it will be gone @@ -72,7 +64,7 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Watcher { // Let's see it's still there // This solution makes every miss very expensive to process since we - // almost completly refresh the cache each time + // almost completely refresh the cache each time return !refreshHLogsAndSearch(log); } @@ -117,8 +109,6 @@ public class ReplicationLogCleaner implements LogCleanerDelegate, Watcher { @Override public void setConf(Configuration conf) { this.conf = conf; - this.ttlCleaner = new TimeToLiveLogCleaner(); - this.ttlCleaner.setConf(conf); try { this.zkHelper = new ReplicationZookeeperWrapper( ZooKeeperWrapper.createInstance(this.conf, diff --git a/src/main/resources/hbase-default.xml b/src/main/resources/hbase-default.xml index e3a966927a3..b73f0ff5768 100644 --- a/src/main/resources/hbase-default.xml +++ b/src/main/resources/hbase-default.xml @@ -289,6 +289,17 @@ after which it will be cleaned by a master thread. + + hbase.master.logcleaner.plugins + org.apache.hadoop.hbase.master.TimeToLiveLogCleaner,org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner + A comma-separated list of LogCleanerDelegate that are used + in LogsCleaner. These log cleaners are called in order, so put the log + cleaner that prunes the most log files in the front. To implement your own + LogCleanerDelegate, just put it in HBase's classpath and add the fully + qualified class name here. Without special reason, you should always add + the above default log cleaners in the list. + + hbase.regions.percheckin 10 diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java b/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java similarity index 60% rename from src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java rename to src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java index a92e0da2631..8b7f60fa779 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestLogsCleaner.java @@ -26,11 +26,13 @@ import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.replication.ReplicationZookeeperWrapper; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWrapper; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.conf.Configuration; @@ -38,16 +40,18 @@ import org.apache.hadoop.conf.Configuration; import java.net.URLEncoder; import java.util.concurrent.atomic.AtomicBoolean; -public class TestOldLogsCleaner { +public class TestLogsCleaner { private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private ReplicationZookeeperWrapper zkHelper; /** * @throws java.lang.Exception */ @BeforeClass public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniZKCluster(); } /** @@ -55,6 +59,7 @@ public class TestOldLogsCleaner { */ @AfterClass public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniZKCluster(); } /** @@ -62,6 +67,10 @@ public class TestOldLogsCleaner { */ @Before public void setUp() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + zkHelper = new ReplicationZookeeperWrapper( + ZooKeeperWrapper.createInstance(conf, HRegionServer.class.getName()), + conf, new AtomicBoolean(true), "test-cluster"); } /** @@ -80,23 +89,40 @@ public class TestOldLogsCleaner { FileSystem fs = FileSystem.get(c); AtomicBoolean stop = new AtomicBoolean(false); - OldLogsCleaner cleaner = new OldLogsCleaner(1000, stop,c, fs, oldLogDir); + LogsCleaner cleaner = new LogsCleaner(1000, stop,c, fs, oldLogDir); // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files long now = System.currentTimeMillis(); fs.delete(oldLogDir, true); fs.mkdirs(oldLogDir); + // Case 1: 2 invalid files, which would be deleted directly fs.createNewFile(new Path(oldLogDir, "a")); fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a")); + // Case 2: 1 "recent" file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now)); System.out.println("Now is: " + now); for (int i = 0; i < 30; i++) { - fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now - 6000000 - i) )); + // Case 3: old files which would be deletable for the first log cleaner + // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner) + Path fileName = new Path(oldLogDir, fakeMachineName + "." + + (now - 6000000 - i) ); + fs.createNewFile(fileName); + // Case 4: put 3 old log files in ZK indicating that they are scheduled + // for replication so these files would pass the first log cleaner + // (TimeToLiveLogCleaner) but would be rejected by the second + // (ReplicationLogCleaner) + if (i % (30/3) == 0) { + zkHelper.addLogToList(fileName.getName(), fakeMachineName); + System.out.println("Replication log file: " + fileName); + } } for (FileStatus stat : fs.listStatus(oldLogDir)) { System.out.println(stat.getPath().toString()); } + // Case 2: 1 newer file, not even deletable for the first log cleaner + // (TimeToLiveLogCleaner), so we are not going down the chain fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) )); assertEquals(34, fs.listStatus(oldLogDir).length); @@ -106,11 +132,17 @@ public class TestOldLogsCleaner { assertEquals(14, fs.listStatus(oldLogDir).length); - // We will delete all remaining log files and those that are invalid + // We will delete all remaining log files which are not scheduled for + // replication and those that are invalid cleaner.chore(); - // We end up with the current log file and a newer one - assertEquals(2, fs.listStatus(oldLogDir).length); + // We end up with the current log file, a newer one and the 3 old log + // files which are scheduled for replication + assertEquals(5, fs.listStatus(oldLogDir).length); + + for (FileStatus file : fs.listStatus(oldLogDir)) { + System.out.println("Keeped log files: " + file.getPath().getName()); + } } }