From 8465f20316344dbbd84865596edd5a5af6bf9930 Mon Sep 17 00:00:00 2001 From: mbertozzi Date: Thu, 4 Apr 2013 11:10:56 +0000 Subject: [PATCH] HBASE-7615 Add metrics for snapshots git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1464441 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/master/MetricsMasterSource.java | 11 + .../hbase/master/MetricsMasterSourceImpl.java | 25 ++ .../hbase/master/MetricsMasterSourceImpl.java | 25 ++ .../apache/hadoop/hbase/master/HMaster.java | 2 +- .../hadoop/hbase/master/MetricsMaster.java | 24 ++ .../master/snapshot/CloneSnapshotHandler.java | 7 +- .../DisabledTableSnapshotHandler.java | 5 +- .../snapshot/EnabledTableSnapshotHandler.java | 5 +- .../snapshot/RestoreSnapshotHandler.java | 9 +- .../master/snapshot/SnapshotManager.java | 21 +- .../master/snapshot/TakeSnapshotHandler.java | 8 +- .../hadoop/hbase/snapshot/SnapshotInfo.java | 281 ++++++++++++++---- .../hbase-webapps/master/snapshot.jsp | 34 +++ .../master/snapshot/TestSnapshotManager.java | 4 +- 14 files changed, 390 insertions(+), 71 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java index 1350b0182fa..7ad6e4f2de6 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSource.java @@ -57,6 +57,9 @@ public interface MetricsMasterSource extends BaseSource { static final String IS_ACTIVE_MASTER_NAME = "isActiveMaster"; static final String SPLIT_TIME_NAME = "hlogSplitTime"; static final String SPLIT_SIZE_NAME = "hlogSplitSize"; + static final String SNAPSHOT_TIME_NAME = "snapshotTime"; + static final String SNAPSHOT_RESTORE_TIME_NAME = "snapshotRestoreTime"; + static final String SNAPSHOT_CLONE_TIME_NAME = "snapshotCloneTime"; static final String CLUSTER_REQUESTS_NAME = "clusterRequests"; static final String RIT_COUNT_NAME = "ritCount"; static final String RIT_COUNT_OVER_THRESHOLD_NAME = "ritCountOverThreshold"; @@ -72,6 +75,9 @@ public interface MetricsMasterSource extends BaseSource { static final String IS_ACTIVE_MASTER_DESC = "Is Active Master"; static final String SPLIT_TIME_DESC = "Time it takes to finish HLog.splitLog()"; static final String SPLIT_SIZE_DESC = "Size of HLog files being split"; + static final String SNAPSHOT_TIME_DESC = "Time it takes to finish snapshot()"; + static final String SNAPSHOT_RESTORE_TIME_DESC = "Time it takes to finish restoreSnapshot()"; + static final String SNAPSHOT_CLONE_TIME_DESC = "Time it takes to finish cloneSnapshot()"; /** @@ -106,4 +112,9 @@ public interface MetricsMasterSource extends BaseSource { void updateSplitSize(long size); + void updateSnapshotTime(long time); + + void updateSnapshotCloneTime(long time); + + void updateSnapshotRestoreTime(long time); } diff --git a/hbase-hadoop1-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java b/hbase-hadoop1-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java index ad078400eed..2d7c9fb6fd3 100644 --- a/hbase-hadoop1-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java +++ b/hbase-hadoop1-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java @@ -26,6 +26,7 @@ import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.lib.MetricMutableCounterLong; import org.apache.hadoop.metrics2.lib.MetricMutableGaugeLong; import org.apache.hadoop.metrics2.lib.MetricMutableHistogram; +import org.apache.hadoop.metrics2.lib.MetricMutableStat; /** * Hadoop1 implementation of MetricsMasterSource. @@ -44,6 +45,9 @@ public class MetricsMasterSourceImpl private MetricMutableGaugeLong ritOldestAgeGauge; private MetricMutableHistogram splitTimeHisto; private MetricMutableHistogram splitSizeHisto; + private MetricMutableStat snapshotTimeHisto; + private MetricMutableStat snapshotCloneTimeHisto; + private MetricMutableStat snapshotRestoreTimeHisto; public MetricsMasterSourceImpl(MetricsMasterWrapper masterWrapper) { this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT, masterWrapper); @@ -67,6 +71,12 @@ public class MetricsMasterSourceImpl ritOldestAgeGauge = metricsRegistry.newGauge(RIT_OLDEST_AGE_NAME, "", 0l); splitSizeHisto = metricsRegistry.newHistogram(SPLIT_SIZE_NAME, SPLIT_SIZE_DESC); splitTimeHisto = metricsRegistry.newHistogram(SPLIT_TIME_NAME, SPLIT_TIME_DESC); + snapshotTimeHisto = metricsRegistry.newStat( + SNAPSHOT_TIME_NAME, SNAPSHOT_TIME_DESC, "Ops", "Time", true); + snapshotCloneTimeHisto = metricsRegistry.newStat( + SNAPSHOT_CLONE_TIME_NAME, SNAPSHOT_CLONE_TIME_DESC, "Ops", "Time", true); + snapshotRestoreTimeHisto = metricsRegistry.newStat( + SNAPSHOT_RESTORE_TIME_NAME, SNAPSHOT_RESTORE_TIME_DESC, "Ops", "Time", true); } public void incRequests(final int inc) { @@ -95,6 +105,21 @@ public class MetricsMasterSourceImpl splitSizeHisto.add(size); } + @Override + public void updateSnapshotTime(long time) { + snapshotTimeHisto.add(time); + } + + @Override + public void updateSnapshotCloneTime(long time) { + snapshotCloneTimeHisto.add(time); + } + + @Override + public void updateSnapshotRestoreTime(long time) { + snapshotRestoreTimeHisto.add(time); + } + /** * Method to export all the metrics. * diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java index 5dc0bf854a8..6f9f1430361 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/master/MetricsMasterSourceImpl.java @@ -25,6 +25,7 @@ import org.apache.hadoop.metrics2.lib.Interns; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableHistogram; +import org.apache.hadoop.metrics2.lib.MutableStat; /** * Hadoop2 implementation of MetricsMasterSource. @@ -41,6 +42,9 @@ public class MetricsMasterSourceImpl private MutableGaugeLong ritOldestAgeGauge; private MutableHistogram splitTimeHisto; private MutableHistogram splitSizeHisto; + private MutableStat snapshotTimeHisto; + private MutableStat snapshotCloneTimeHisto; + private MutableStat snapshotRestoreTimeHisto; public MetricsMasterSourceImpl(MetricsMasterWrapper masterWrapper) { this(METRICS_NAME, @@ -69,6 +73,12 @@ public class MetricsMasterSourceImpl ritOldestAgeGauge = metricsRegistry.newGauge(RIT_OLDEST_AGE_NAME, "", 0l); splitSizeHisto = metricsRegistry.newHistogram(SPLIT_SIZE_NAME, SPLIT_SIZE_DESC); splitTimeHisto = metricsRegistry.newHistogram(SPLIT_TIME_NAME, SPLIT_TIME_DESC); + snapshotTimeHisto = metricsRegistry.newStat( + SNAPSHOT_TIME_NAME, SNAPSHOT_TIME_DESC, "Ops", "Time", true); + snapshotCloneTimeHisto = metricsRegistry.newStat( + SNAPSHOT_CLONE_TIME_NAME, SNAPSHOT_CLONE_TIME_DESC, "Ops", "Time", true); + snapshotRestoreTimeHisto = metricsRegistry.newStat( + SNAPSHOT_RESTORE_TIME_NAME, SNAPSHOT_RESTORE_TIME_DESC, "Ops", "Time", true); } public void incRequests(final int inc) { @@ -97,6 +107,21 @@ public class MetricsMasterSourceImpl splitSizeHisto.add(size); } + @Override + public void updateSnapshotTime(long time) { + snapshotTimeHisto.add(time); + } + + @Override + public void updateSnapshotCloneTime(long time) { + snapshotCloneTimeHisto.add(time); + } + + @Override + public void updateSnapshotRestoreTime(long time) { + snapshotRestoreTimeHisto.add(time); + } + @Override public void getMetrics(MetricsCollector metricsCollector, boolean all) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index ea71218f320..1a6dc50c336 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -618,7 +618,7 @@ Server { ", cluster-up flag was=" + wasUp); // create the snapshot manager - this.snapshotManager = new SnapshotManager(this); + this.snapshotManager = new SnapshotManager(this, this.metricsMaster); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java index 578bca4a242..2a42c44551c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetricsMaster.java @@ -89,4 +89,28 @@ public class MetricsMaster { public void updateRITOldestAge(long timestamp) { masterSource.setRITOldestAge(timestamp); } + + /** + * Record a single instance of a snapshot + * @param time time that the snapshot took + */ + public void addSnapshot(long time) { + masterSource.updateSnapshotTime(time); + } + + /** + * Record a single instance of a snapshot + * @param time time that the snapshot restore took + */ + public void addSnapshotRestore(long time) { + masterSource.updateSnapshotRestoreTime(time); + } + + /** + * Record a single instance of a snapshot cloned table + * @param time time that the snapshot clone took + */ + public void addSnapshotClone(long time) { + masterSource.updateSnapshotCloneTime(time); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java index 49201dc6c3d..36400f10842 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.exceptions.RestoreSnapshotException; import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.SnapshotSentinel; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -62,15 +63,18 @@ public class CloneSnapshotHandler extends CreateTableHandler implements Snapshot private final SnapshotDescription snapshot; private final ForeignExceptionDispatcher monitor; + private final MetricsMaster metricsMaster; private final MonitoredTask status; private volatile boolean stopped = false; public CloneSnapshotHandler(final MasterServices masterServices, - final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor, + final MetricsMaster metricsMaster) throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, masterServices.getConfiguration(), null, masterServices); + this.metricsMaster = metricsMaster; // Snapshot information this.snapshot = snapshot; @@ -141,6 +145,7 @@ public class CloneSnapshotHandler extends CreateTableHandler implements Snapshot } else { status.markComplete("Snapshot '"+ snapshot.getName() +"' clone completed and table enabled!"); } + metricsMaster.addSnapshotClone(status.getCompletionTimestamp() - status.getStartTime()); super.completed(exception); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java index f78dd7cd162..6b5dabd0eec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.errorhandling.ForeignException; import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; @@ -64,8 +65,8 @@ public class DisabledTableSnapshotHandler extends TakeSnapshotHandler { * @throws IOException on unexpected error */ public DisabledTableSnapshotHandler(SnapshotDescription snapshot, - final MasterServices masterServices) throws IOException { - super(snapshot, masterServices); + final MasterServices masterServices, final MetricsMaster metricsMaster) throws IOException { + super(snapshot, masterServices, metricsMaster); // setup the timer timeoutInjector = TakeSnapshotUtils.getMasterTimerAndBindToMonitor(snapshot, conf, monitor); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java index 537fbfbfc14..3e45e6432aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.errorhandling.ForeignException; import org.apache.hadoop.hbase.exceptions.HBaseSnapshotException; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.procedure.Procedure; import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; @@ -49,8 +50,8 @@ public class EnabledTableSnapshotHandler extends TakeSnapshotHandler { private final ProcedureCoordinator coordinator; public EnabledTableSnapshotHandler(SnapshotDescription snapshot, MasterServices master, - SnapshotManager manager) throws IOException { - super(snapshot, master); + final SnapshotManager manager, final MetricsMaster metricsMaster) throws IOException { + super(snapshot, master, metricsMaster); this.coordinator = manager.getCoordinator(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java index 9b6752ff1a6..b9b482d9bbd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.SnapshotSentinel; import org.apache.hadoop.hbase.master.handler.TableEventHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -62,14 +63,16 @@ public class RestoreSnapshotHandler extends TableEventHandler implements Snapsho private final SnapshotDescription snapshot; private final ForeignExceptionDispatcher monitor; + private final MetricsMaster metricsMaster; private final MonitoredTask status; private volatile boolean stopped = false; public RestoreSnapshotHandler(final MasterServices masterServices, - final SnapshotDescription snapshot, final HTableDescriptor htd) - throws IOException { + final SnapshotDescription snapshot, final HTableDescriptor htd, + final MetricsMaster metricsMaster) throws IOException { super(EventType.C_M_RESTORE_SNAPSHOT, htd.getName(), masterServices, masterServices); + this.metricsMaster = metricsMaster; // Snapshot information this.snapshot = snapshot; @@ -146,6 +149,8 @@ public class RestoreSnapshotHandler extends TableEventHandler implements Snapsho } else { status.markComplete("Restore snapshot '"+ snapshot.getName() +"'!"); } + metricsMaster.addSnapshotRestore(status.getCompletionTimestamp() - status.getStartTime()); + super.completed(exception); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java index 2aeb5f968b8..6e6e92609a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.SnapshotSentinel; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; @@ -118,6 +119,7 @@ public class SnapshotManager implements Stoppable { private boolean stopped; private final long wakeFrequency; private final MasterServices master; // Needed by TableEventHandlers + private final MetricsMaster metricsMaster; private final ProcedureCoordinator coordinator; // Is snapshot feature enabled? @@ -139,9 +141,11 @@ public class SnapshotManager implements Stoppable { * Construct a snapshot manager. * @param master */ - public SnapshotManager(final MasterServices master) throws KeeperException, IOException, - UnsupportedOperationException { + public SnapshotManager(final MasterServices master, final MetricsMaster metricsMaster) + throws KeeperException, IOException, UnsupportedOperationException { this.master = master; + this.metricsMaster = metricsMaster; + checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); // get the configuration for the coordinator @@ -166,9 +170,12 @@ public class SnapshotManager implements Stoppable { * @param coordinator procedure coordinator instance. exposed for testing. * @param pool HBase ExecutorServcie instance, exposed for testing. */ - public SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator, ExecutorService pool) + public SnapshotManager(final MasterServices master, final MetricsMaster metricsMaster, + ProcedureCoordinator coordinator, ExecutorService pool) throws IOException, UnsupportedOperationException { this.master = master; + this.metricsMaster = metricsMaster; + checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); this.wakeFrequency = master.getConfiguration().getInt(SNAPSHOT_WAKE_MILLIS_KEY, @@ -428,7 +435,7 @@ public class SnapshotManager implements Stoppable { throws HBaseSnapshotException { TakeSnapshotHandler handler; try { - handler = new EnabledTableSnapshotHandler(snapshot, master, this).prepare(); + handler = new EnabledTableSnapshotHandler(snapshot, master, this, metricsMaster).prepare(); this.executorService.submit(handler); this.handler = handler; } catch (Exception e) { @@ -537,7 +544,7 @@ public class SnapshotManager implements Stoppable { DisabledTableSnapshotHandler handler; try { - handler = new DisabledTableSnapshotHandler(snapshot, this.master).prepare(); + handler = new DisabledTableSnapshotHandler(snapshot, master, metricsMaster).prepare(); this.executorService.submit(handler); this.handler = handler; } catch (Exception e) { @@ -620,7 +627,7 @@ public class SnapshotManager implements Stoppable { try { CloneSnapshotHandler handler = - new CloneSnapshotHandler(master, snapshot, hTableDescriptor).prepare(); + new CloneSnapshotHandler(master, snapshot, hTableDescriptor, metricsMaster).prepare(); this.executorService.submit(handler); restoreHandlers.put(tableName, handler); } catch (Exception e) { @@ -711,7 +718,7 @@ public class SnapshotManager implements Stoppable { try { RestoreSnapshotHandler handler = - new RestoreSnapshotHandler(master, snapshot, hTableDescriptor); + new RestoreSnapshotHandler(master, snapshot, hTableDescriptor, metricsMaster); this.executorService.submit(handler); restoreHandlers.put(hTableDescriptor.getNameAsString(), handler); } catch (Exception e) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java index 8f38cc7bfd6..6d018e556b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.exceptions.SnapshotCreationException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.SnapshotSentinel; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; @@ -70,6 +71,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh // none of these should ever be null protected final MasterServices master; + protected final MetricsMaster metricsMaster; protected final SnapshotDescription snapshot; protected final Configuration conf; protected final FileSystem fs; @@ -87,13 +89,14 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh * @param masterServices master services provider * @throws IOException on unexpected error */ - public TakeSnapshotHandler(SnapshotDescription snapshot, - final MasterServices masterServices) throws IOException { + public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices, + final MetricsMaster metricsMaster) throws IOException { super(masterServices, EventType.C_M_SNAPSHOT_TABLE); assert snapshot != null : "SnapshotDescription must not be nul1"; assert masterServices != null : "MasterServices must not be nul1"; this.master = masterServices; + this.metricsMaster = metricsMaster; this.snapshot = snapshot; this.conf = this.master.getConfiguration(); this.fs = this.master.getMasterFileSystem().getFileSystem(); @@ -173,6 +176,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh completeSnapshot(this.snapshotDir, this.workingDir, this.fs); status.markComplete("Snapshot " + snapshot.getName() + " of table " + snapshot.getTable() + " completed"); + metricsMaster.addSnapshot(status.getCompletionTimestamp() - status.getStartTime()); } catch (Exception e) { status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshot.getTable() + " because " + e.getMessage()); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java index d0256e2ccdc..7caad2b251d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java @@ -63,6 +63,187 @@ import org.apache.hadoop.hbase.util.FSTableDescriptors; public final class SnapshotInfo extends Configured implements Tool { private static final Log LOG = LogFactory.getLog(SnapshotInfo.class); + /** + * Statistics about the snapshot + *
    + *
  1. How many store files and logs are in the archive + *
  2. How many store files and logs are shared with the table + *
  3. Total store files and logs size and shared amount + *
+ */ + public static class SnapshotStats { + /** Information about the file referenced by the snapshot */ + static class FileInfo { + private final boolean inArchive; + private final long size; + + FileInfo(final boolean inArchive, final long size) { + this.inArchive = inArchive; + this.size = size; + } + + /** @return true if the file is in the archive */ + public boolean inArchive() { + return this.inArchive; + } + + /** @return true if the file is missing */ + public boolean isMissing() { + return this.size < 0; + } + + /** @return the file size */ + public long getSize() { + return this.size; + } + } + + private int hfileArchiveCount = 0; + private int hfilesMissing = 0; + private int hfilesCount = 0; + private int logsMissing = 0; + private int logsCount = 0; + private long hfileArchiveSize = 0; + private long hfileSize = 0; + private long logSize = 0; + + private final SnapshotDescription snapshot; + private final Configuration conf; + private final FileSystem fs; + + SnapshotStats(final Configuration conf, final FileSystem fs, final SnapshotDescription snapshot) + { + this.snapshot = snapshot; + this.conf = conf; + this.fs = fs; + } + + /** @return the snapshot descriptor */ + public SnapshotDescription getSnapshotDescription() { + return this.snapshot; + } + + /** @return true if the snapshot is corrupted */ + public boolean isSnapshotCorrupted() { + return hfilesMissing > 0 || logsMissing > 0; + } + + /** @return the number of available store files */ + public int getStoreFilesCount() { + return hfilesCount + hfileArchiveCount; + } + + /** @return the number of available store files in the archive */ + public int getArchivedStoreFilesCount() { + return hfileArchiveCount; + } + + /** @return the number of available log files */ + public int getLogsCount() { + return logsCount; + } + + /** @return the number of missing store files */ + public int getMissingStoreFilesCount() { + return hfilesMissing; + } + + /** @return the number of missing log files */ + public int getMissingLogsCount() { + return logsMissing; + } + + /** @return the total size of the store files referenced by the snapshot */ + public long getStoreFilesSize() { + return hfileSize + hfileArchiveSize; + } + + /** @return the total size of the store files shared */ + public long getSharedStoreFilesSize() { + return hfileSize; + } + + /** @return the total size of the store files in the archive */ + public long getArchivedStoreFileSize() { + return hfileArchiveSize; + } + + /** @return the percentage of the shared store files */ + public float getSharedStoreFilePercentage() { + return ((float)hfileSize / (hfileSize + hfileArchiveSize)) * 100; + } + + /** @return the total log size */ + public long getLogsSize() { + return logSize; + } + + /** + * Add the specified store file to the stats + * @param region region encoded Name + * @param family family name + * @param hfile store file name + * @return the store file information + */ + FileInfo addStoreFile(final String region, final String family, final String hfile) + throws IOException { + String table = this.snapshot.getTable(); + Path path = new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); + HFileLink link = new HFileLink(conf, path); + boolean inArchive = false; + long size = -1; + try { + if ((inArchive = fs.exists(link.getArchivePath()))) { + size = fs.getFileStatus(link.getArchivePath()).getLen(); + hfileArchiveSize += size; + hfileArchiveCount++; + } else { + size = link.getFileStatus(fs).getLen(); + hfileSize += size; + hfilesCount++; + } + } catch (FileNotFoundException e) { + hfilesMissing++; + } + return new FileInfo(inArchive, size); + } + + /** + * Add the specified recovered.edits file to the stats + * @param region region encoded name + * @param logfile log file name + * @return the recovered.edits information + */ + FileInfo addRecoveredEdits(final String region, final String logfile) throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile); + long size = fs.getFileStatus(path).getLen(); + logSize += size; + logsCount++; + return new FileInfo(true, size); + } + + /** + * Add the specified log file to the stats + * @param server server name + * @param logfile log file name + * @return the log information + */ + FileInfo addLogFile(final String server, final String logfile) throws IOException { + HLogLink logLink = new HLogLink(conf, server, logfile); + long size = -1; + try { + size = logLink.getFileStatus(fs).getLen(); + logSize += size; + logsCount++; + } catch (FileNotFoundException e) { + logsMissing++; + } + return new FileInfo(false, size); + } + } + private FileSystem fs; private Path rootDir; @@ -170,104 +351,68 @@ public final class SnapshotInfo extends Configured implements Tool { * dump the file list if requested and the collected information. */ private void printFiles(final boolean showFiles) throws IOException { - final String table = snapshotDesc.getTable(); - final Configuration conf = getConf(); - if (showFiles) { System.out.println("Snapshot Files"); System.out.println("----------------------------------------"); } // Collect information about hfiles and logs in the snapshot - final AtomicInteger hfileArchiveCount = new AtomicInteger(); - final AtomicInteger hfilesMissing = new AtomicInteger(); - final AtomicInteger hfilesCount = new AtomicInteger(); - final AtomicInteger logsMissing = new AtomicInteger(); - final AtomicInteger logsCount = new AtomicInteger(); - final AtomicLong hfileArchiveSize = new AtomicLong(); - final AtomicLong hfileSize = new AtomicLong(); - final AtomicLong logSize = new AtomicLong(); + final String table = this.snapshotDesc.getTable(); + final SnapshotStats stats = new SnapshotStats(this.getConf(), this.fs, this.snapshotDesc); SnapshotReferenceUtil.visitReferencedFiles(fs, snapshotDir, new SnapshotReferenceUtil.FileVisitor() { public void storeFile (final String region, final String family, final String hfile) throws IOException { - Path path = new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); - HFileLink link = new HFileLink(conf, path); - boolean inArchive = false; - long size = -1; - try { - if ((inArchive = fs.exists(link.getArchivePath()))) { - size = fs.getFileStatus(link.getArchivePath()).getLen(); - hfileArchiveSize.addAndGet(size); - hfileArchiveCount.addAndGet(1); - } else { - size = link.getFileStatus(fs).getLen(); - hfileSize.addAndGet(size); - hfilesCount.addAndGet(1); - } - } catch (FileNotFoundException e) { - hfilesMissing.addAndGet(1); - } + SnapshotStats.FileInfo info = stats.addStoreFile(region, family, hfile); if (showFiles) { System.out.printf("%8s %s/%s/%s/%s %s%n", - (size < 0 ? "-" : StringUtils.humanReadableInt(size)), + (info.isMissing() ? "-" : StringUtils.humanReadableInt(info.getSize())), table, region, family, hfile, - (inArchive ? "(archive)" : (size < 0) ? "(NOT FOUND)" : "")); + (info.inArchive() ? "(archive)" : info.isMissing() ? "(NOT FOUND)" : "")); } } public void recoveredEdits (final String region, final String logfile) throws IOException { - Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile); - long size = fs.getFileStatus(path).getLen(); - logSize.addAndGet(size); - logsCount.addAndGet(1); + SnapshotStats.FileInfo info = stats.addRecoveredEdits(region, logfile); if (showFiles) { System.out.printf("%8s recovered.edits %s on region %s%n", - StringUtils.humanReadableInt(size), logfile, region); + StringUtils.humanReadableInt(info.getSize()), logfile, region); } } public void logFile (final String server, final String logfile) throws IOException { - HLogLink logLink = new HLogLink(conf, server, logfile); - long size = -1; - try { - size = logLink.getFileStatus(fs).getLen(); - logSize.addAndGet(size); - logsCount.addAndGet(1); - } catch (FileNotFoundException e) { - logsMissing.addAndGet(1); - } + SnapshotStats.FileInfo info = stats.addLogFile(server, logfile); if (showFiles) { System.out.printf("%8s log %s on server %s %s%n", - (size < 0 ? "-" : StringUtils.humanReadableInt(size)), + (info.isMissing() ? "-" : StringUtils.humanReadableInt(info.getSize())), logfile, server, - (size < 0 ? "(NOT FOUND)" : "")); + (info.isMissing() ? "(NOT FOUND)" : "")); } } }); // Dump the stats System.out.println(); - if (hfilesMissing.get() > 0 || logsMissing.get() > 0) { + if (stats.isSnapshotCorrupted()) { System.out.println("**************************************************************"); System.out.printf("BAD SNAPSHOT: %d hfile(s) and %d log(s) missing.%n", - hfilesMissing.get(), logsMissing.get()); + stats.getMissingStoreFilesCount(), stats.getMissingLogsCount()); System.out.println("**************************************************************"); } System.out.printf("%d HFiles (%d in archive), total size %s (%.2f%% %s shared with the source table)%n", - hfilesCount.get() + hfileArchiveCount.get(), hfileArchiveCount.get(), - StringUtils.humanReadableInt(hfileSize.get() + hfileArchiveSize.get()), - ((float)hfileSize.get() / (hfileSize.get() + hfileArchiveSize.get())) * 100, - StringUtils.humanReadableInt(hfileSize.get()) + stats.getStoreFilesCount(), stats.getArchivedStoreFilesCount(), + StringUtils.humanReadableInt(stats.getStoreFilesSize()), + stats.getSharedStoreFilePercentage(), + StringUtils.humanReadableInt(stats.getSharedStoreFilesSize()) ); System.out.printf("%d Logs, total size %s%n", - logsCount.get(), StringUtils.humanReadableInt(logSize.get())); + stats.getLogsCount(), StringUtils.humanReadableInt(stats.getLogsSize())); System.out.println(); } @@ -286,6 +431,36 @@ public final class SnapshotInfo extends Configured implements Tool { System.exit(1); } + /** + * Returns the snapshot stats + * @param conf the {@link Configuration} to use + * @param snapshot {@link SnapshotDescription} to get stats from + * @return the snapshot stats + */ + public static SnapshotStats getSnapshotStats(final Configuration conf, + final SnapshotDescription snapshot) throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = FileSystem.get(conf); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + final SnapshotStats stats = new SnapshotStats(conf, fs, snapshot); + SnapshotReferenceUtil.visitReferencedFiles(fs, snapshotDir, + new SnapshotReferenceUtil.FileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + stats.addStoreFile(region, family, hfile); + } + + public void recoveredEdits (final String region, final String logfile) throws IOException { + stats.addRecoveredEdits(region, logfile); + } + + public void logFile (final String server, final String logfile) throws IOException { + stats.addLogFile(server, logfile); + } + }); + return stats; + } + /** * The guts of the {@link #main} method. * Call this method to avoid the {@link #main(String[])} System.exit. diff --git a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp index ae1904cd840..61d662cfb09 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp @@ -28,10 +28,12 @@ import="org.apache.hadoop.hbase.ServerLoad" import="org.apache.hadoop.hbase.RegionLoad" import="org.apache.hadoop.hbase.master.HMaster" + import="org.apache.hadoop.hbase.snapshot.SnapshotInfo" import="org.apache.hadoop.hbase.util.Bytes" import="org.apache.hadoop.hbase.util.FSUtils" import="org.apache.hadoop.hbase.protobuf.ProtobufUtil" import="org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription" + import="org.apache.hadoop.util.StringUtils" import="java.util.List" import="java.util.Map" import="org.apache.hadoop.hbase.HConstants"%><% @@ -41,9 +43,11 @@ boolean readOnly = conf.getBoolean("hbase.master.ui.readonly", false); String snapshotName = request.getParameter("name"); SnapshotDescription snapshot = null; + SnapshotInfo.SnapshotStats stats = null; for (SnapshotDescription snapshotDesc: hbadmin.listSnapshots()) { if (snapshotName.equals(snapshotDesc.getName())) { snapshot = snapshotDesc; + stats = SnapshotInfo.getSnapshotStats(conf, snapshot); break; } } @@ -158,14 +162,44 @@ Creation Time Type Format Version + State <%= snapshot.getTable() %> <%= new Date(snapshot.getCreationTime()) %> <%= snapshot.getType() %> <%= snapshot.getVersion() %> + <% if (stats.isSnapshotCorrupted()) { %> + CORRUPTED + <% } else { %> + ok + <% } %> +
+
+ <%= stats.getStoreFilesCount() %> HFiles (<%= stats.getArchivedStoreFilesCount() %> in archive), + total size <%= StringUtils.humanReadableInt(stats.getStoreFilesSize()) %> + (<%= stats.getSharedStoreFilePercentage() %>% + <%= StringUtils.humanReadableInt(stats.getSharedStoreFilesSize()) %> shared with the source + table) +
+
+ <%= stats.getLogsCount() %> Logs, total size + <%= StringUtils.humanReadableInt(stats.getLogsSize()) %> +
+
+ <% if (stats.isSnapshotCorrupted()) { %> +
+
+

CORRUPTED Snapshot

+
+
+ <%= stats.getMissingStoreFilesCount() %> hfile(s) and + <%= stats.getMissingLogsCount() %> log(s) missing. +
+
+ <% } %> <% } // end else diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java index c84b6c03f51..0065af17ac4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.MetricsMaster; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; @@ -49,6 +50,7 @@ public class TestSnapshotManager { private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); MasterServices services = Mockito.mock(MasterServices.class); + MetricsMaster metrics = Mockito.mock(MetricsMaster.class); ProcedureCoordinator coordinator = Mockito.mock(ProcedureCoordinator.class); ExecutorService pool = Mockito.mock(ExecutorService.class); MasterFileSystem mfs = Mockito.mock(MasterFileSystem.class); @@ -71,7 +73,7 @@ public class TestSnapshotManager { Mockito.when(services.getMasterFileSystem()).thenReturn(mfs); Mockito.when(mfs.getFileSystem()).thenReturn(fs); Mockito.when(mfs.getRootDir()).thenReturn(UTIL.getDataTestDir()); - return new SnapshotManager(services, coordinator, pool); + return new SnapshotManager(services, metrics, coordinator, pool); } @Test