From 3a72ccefbda5ab0bdb7bc171a115504127a2d1f6 Mon Sep 17 00:00:00 2001 From: Viraj Jasani <34790606+virajjasani@users.noreply.github.com> Date: Tue, 23 Jul 2019 03:33:44 +0530 Subject: [PATCH] HBASE-22648 Snapshot TTL (#371) Signed-off-by: Reid Chan Signed-off-by: Andrew Purtell Conflicts: hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java --- .../org/apache/hadoop/hbase/client/Admin.java | 60 +++++- .../hbase/client/SnapshotDescription.java | 72 +++++-- .../hbase/shaded/protobuf/ProtobufUtil.java | 16 +- .../ClientSnapshotDescriptionUtils.java | 13 +- .../org/apache/hadoop/hbase/HConstants.java | 9 + .../src/main/resources/hbase-default.xml | 19 ++ .../src/main/protobuf/Snapshot.proto | 1 + hbase-protocol/src/main/protobuf/HBase.proto | 1 + .../apache/hadoop/hbase/master/HMaster.java | 13 ++ .../master/cleaner/SnapshotCleanerChore.java | 110 ++++++++++ .../snapshot/SnapshotDescriptionUtils.java | 19 ++ .../hadoop/hbase/snapshot/SnapshotInfo.java | 11 +- .../hbase-webapps/master/snapshot.jsp | 10 + .../hbase-webapps/master/snapshotsStats.jsp | 8 + .../hbase/client/TestSnapshotFromClient.java | 2 +- .../TestSnapshotTemporaryDirectory.java | 5 +- .../cleaner/TestSnapshotCleanerChore.java | 199 ++++++++++++++++++ hbase-shell/src/main/ruby/hbase/admin.rb | 8 +- .../asciidoc/_chapters/hbase-default.adoc | 30 +++ src/main/asciidoc/_chapters/ops_mgt.adoc | 41 ++++ 20 files changed, 608 insertions(+), 39 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index e6ccc771147..35e1a95df0a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2069,13 +2069,59 @@ public interface Admin extends Abortable, Closeable { } /** - * Take a snapshot and wait for the server to complete that snapshot (blocking). Only a single - * snapshot should be taken at a time for an instance of HBase, or results may be undefined (you - * can tell multiple HBase clusters to snapshot at the same time, but only one at a time for a - * single cluster). Snapshots are considered unique based on the name of the snapshot. - * Attempts to take a snapshot with the same name (even a different type or with different - * parameters) will fail with a {@link SnapshotCreationException} indicating the duplicate naming. - * Snapshot names follow the same naming constraints as tables in HBase. See + * Create typed snapshot of the table. Snapshots are considered unique based on the name of the + * snapshot. Snapshots are taken sequentially even when requested concurrently, across + * all tables. Attempts to take a snapshot with the same name (even a different type or with + * different parameters) will fail with a {@link SnapshotCreationException} indicating the + * duplicate naming. Snapshot names follow the same naming constraints as tables in HBase. See + * {@link org.apache.hadoop.hbase.TableName#isLegalFullyQualifiedTableName(byte[])}. + * Snapshot can live with ttl seconds. + * + * @param snapshotName name to give the snapshot on the filesystem. Must be unique from all other + * snapshots stored on the cluster + * @param tableName name of the table to snapshot + * @param type type of snapshot to take + * @param snapshotProps snapshot additional properties e.g. TTL + * @throws IOException we fail to reach the master + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + default void snapshot(String snapshotName, TableName tableName, SnapshotType type, + Map snapshotProps) throws IOException, + SnapshotCreationException, IllegalArgumentException { + snapshot(new SnapshotDescription(snapshotName, tableName, type, snapshotProps)); + } + + /** + * Create typed snapshot of the table. Snapshots are considered unique based on the name of the + * snapshot. Snapshots are taken sequentially even when requested concurrently, across + * all tables. Attempts to take a snapshot with the same name (even a different type or with + * different parameters) will fail with a {@link SnapshotCreationException} indicating the + * duplicate naming. Snapshot names follow the same naming constraints as tables in HBase. See + * {@link org.apache.hadoop.hbase.TableName#isLegalFullyQualifiedTableName(byte[])}. + * Snapshot can live with ttl seconds. + * + * @param snapshotName name to give the snapshot on the filesystem. Must be unique from all other + * snapshots stored on the cluster + * @param tableName name of the table to snapshot + * @param snapshotProps snapshot additional properties e.g. TTL + * @throws IOException we fail to reach the master + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + default void snapshot(String snapshotName, TableName tableName, + Map snapshotProps) throws IOException, + SnapshotCreationException, IllegalArgumentException { + snapshot(new SnapshotDescription(snapshotName, tableName, SnapshotType.FLUSH, snapshotProps)); + } + + /** + * Take a snapshot and wait for the server to complete that snapshot (blocking). Snapshots are + * considered unique based on the name of the snapshot. Snapshots are taken sequentially + * even when requested concurrently, across all tables. Attempts to take a snapshot with the same + * name (even a different type or with different parameters) will fail with a + * {@link SnapshotCreationException} indicating the duplicate naming. Snapshot names follow the + * same naming constraints as tables in HBase. See * {@link org.apache.hadoop.hbase.TableName#isLegalFullyQualifiedTableName(byte[])}. You should * probably use {@link #snapshot(String, org.apache.hadoop.hbase.TableName)} or * {@link #snapshot(byte[], org.apache.hadoop.hbase.TableName)} unless you are sure about the type diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java index 0b6f196d615..4fa825e5f45 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/SnapshotDescription.java @@ -17,9 +17,13 @@ */ package org.apache.hadoop.hbase.client; +import java.util.Map; + import org.apache.hadoop.hbase.TableName; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.org.apache.commons.collections4.MapUtils; + /** * The POJO equivalent of HBaseProtos.SnapshotDescription */ @@ -30,6 +34,7 @@ public class SnapshotDescription { private final SnapshotType snapShotType; private final String owner; private final long creationTime; + private final long ttl; private final int version; public SnapshotDescription(String name) { @@ -48,7 +53,7 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table) { - this(name, table, SnapshotType.DISABLED, null); + this(name, table, SnapshotType.DISABLED, null, -1, -1, null); } /** @@ -63,14 +68,14 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table, SnapshotType type) { - this(name, table, type, null); + this(name, table, type, null, -1, -1, null); } /** - * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName - * instance instead. * @see #SnapshotDescription(String, TableName, SnapshotType, String) * @see HBASE-16892 + * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName + * instance instead. */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner) { @@ -78,31 +83,60 @@ public class SnapshotDescription { } public SnapshotDescription(String name, TableName table, SnapshotType type, String owner) { - this(name, table, type, owner, -1, -1); + this(name, table, type, owner, -1, -1, null); } /** + * @see #SnapshotDescription(String, TableName, SnapshotType, String, long, int, Map) + * @see HBASE-16892 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use the version with the TableName * instance instead. - * @see #SnapshotDescription(String, TableName, SnapshotType, String, long, int) - * @see HBASE-16892 */ @Deprecated public SnapshotDescription(String name, String table, SnapshotType type, String owner, long creationTime, int version) { - this(name, TableName.valueOf(table), type, owner, creationTime, version); + this(name, TableName.valueOf(table), type, owner, creationTime, version, null); } + /** + * SnapshotDescription Parameterized Constructor + * + * @param name Name of the snapshot + * @param table TableName associated with the snapshot + * @param type Type of the snapshot - enum SnapshotType + * @param owner Snapshot Owner + * @param creationTime Creation time for Snapshot + * @param version Snapshot Version + * @param snapshotProps Additional properties for snapshot e.g. TTL + */ public SnapshotDescription(String name, TableName table, SnapshotType type, String owner, - long creationTime, int version) { + long creationTime, int version, Map snapshotProps) { this.name = name; this.table = table; this.snapShotType = type; this.owner = owner; this.creationTime = creationTime; + this.ttl = getTtlFromSnapshotProps(snapshotProps); this.version = version; } + private long getTtlFromSnapshotProps(Map snapshotProps) { + return MapUtils.getLongValue(snapshotProps, "TTL", -1); + } + + /** + * SnapshotDescription Parameterized Constructor + * + * @param snapshotName Name of the snapshot + * @param tableName TableName associated with the snapshot + * @param type Type of the snapshot - enum SnapshotType + * @param snapshotProps Additional properties for snapshot e.g. TTL + */ + public SnapshotDescription(String snapshotName, TableName tableName, SnapshotType type, + Map snapshotProps) { + this(snapshotName, tableName, type, null, -1, -1, snapshotProps); + } + public String getName() { return this.name; } @@ -139,15 +173,27 @@ public class SnapshotDescription { return this.creationTime; } + // get snapshot ttl in sec + public long getTtl() { + return ttl; + } + public int getVersion() { return this.version; } @Override public String toString() { - return "SnapshotDescription: name = " + ((name != null) ? name : null) + "/table = " - + ((table != null) ? table : null) + " /owner = " + ((owner != null) ? owner : null) - + (creationTime != -1 ? ("/creationtime = " + creationTime) : "") - + (version != -1 ? ("/version = " + version) : ""); + return new StringBuilder("SnapshotDescription: ") + .append("name = ") + .append(name) + .append("/table = ") + .append(table) + .append(" /owner = ") + .append(owner) + .append(creationTime != -1 ? ("/creationtime = " + creationTime) : "") + .append(ttl != -1 ? ("/ttl = " + ttl) : "") + .append(version != -1 ? ("/version = " + version) : "") + .toString(); } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index 336c59cdc8c..b3e6e204cdf 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -29,6 +29,7 @@ import java.security.AccessController; import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -2933,12 +2934,15 @@ public final class ProtobufUtil { if (snapshotDesc.getCreationTime() != -1L) { builder.setCreationTime(snapshotDesc.getCreationTime()); } + if (snapshotDesc.getTtl() != -1L && + snapshotDesc.getTtl() < TimeUnit.MILLISECONDS.toSeconds(Long.MAX_VALUE)) { + builder.setTtl(snapshotDesc.getTtl()); + } if (snapshotDesc.getVersion() != -1) { builder.setVersion(snapshotDesc.getVersion()); } builder.setType(ProtobufUtil.createProtosSnapShotDescType(snapshotDesc.getType())); - SnapshotProtos.SnapshotDescription snapshot = builder.build(); - return snapshot; + return builder.build(); } /** @@ -2950,10 +2954,12 @@ public final class ProtobufUtil { */ public static SnapshotDescription createSnapshotDesc(SnapshotProtos.SnapshotDescription snapshotDesc) { + final Map snapshotProps = new HashMap<>(); + snapshotProps.put("TTL", snapshotDesc.getTtl()); return new SnapshotDescription(snapshotDesc.getName(), - snapshotDesc.hasTable() ? TableName.valueOf(snapshotDesc.getTable()) : null, - createSnapshotType(snapshotDesc.getType()), snapshotDesc.getOwner(), - snapshotDesc.getCreationTime(), snapshotDesc.getVersion()); + snapshotDesc.hasTable() ? TableName.valueOf(snapshotDesc.getTable()) : null, + createSnapshotType(snapshotDesc.getType()), snapshotDesc.getOwner(), + snapshotDesc.getCreationTime(), snapshotDesc.getVersion(), snapshotProps); } public static RegionLoadStats createRegionLoadStats(ClientProtos.RegionLoadStats stats) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java index 5a25c52bc0b..106836c50c6 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/snapshot/ClientSnapshotDescriptionUtils.java @@ -61,8 +61,15 @@ public class ClientSnapshotDescriptionUtils { if (ssd == null) { return null; } - return "{ ss=" + ssd.getName() + - " table=" + (ssd.hasTable()?TableName.valueOf(ssd.getTable()):"") + - " type=" + ssd.getType() + " }"; + return new StringBuilder("{ ss=") + .append(ssd.getName()) + .append(" table=") + .append(ssd.hasTable() ? TableName.valueOf(ssd.getTable()) : "") + .append(" type=") + .append(ssd.getType()) + .append(" ttl=") + .append(ssd.getTtl()) + .append(" }") + .toString(); } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 5b3c478809b..e1977d42929 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1475,6 +1475,15 @@ public final class HConstants { "hbase.util.default.lossycounting.errorrate"; public static final String NOT_IMPLEMENTED = "Not implemented"; + // Default TTL - FOREVER + public static final long DEFAULT_SNAPSHOT_TTL = 0; + + // User defined Default TTL config key + public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl"; + + public static final String SNAPSHOT_CLEANER_DISABLE = "hbase.master.cleaner.snapshot.disable"; + + private HConstants() { // Can't be instantiated with this ctor. } diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 9135fdf204a..0ed4ab95253 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -1858,4 +1858,23 @@ possible configurations would overwhelm and obscure the important. Default is 5 minutes. Make it 30 seconds for tests. See HBASE-19794 for some context. + + hbase.master.cleaner.snapshot.interval + 1800000 + + Snapshot Cleanup chore interval in milliseconds. + The cleanup thread keeps running at this interval + to find all snapshots that are expired based on TTL + and delete them. + + + + hbase.master.snapshot.ttl + 0 + + Default Snapshot TTL to be considered when the user does not specify TTL while + creating snapshot. Default value 0 indicates FOREVERE - snapshot should not be + automatically deleted until it is manually deleted + + diff --git a/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto b/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto index 479e33ee542..5faa7ab822b 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Snapshot.proto @@ -44,6 +44,7 @@ message SnapshotDescription { optional int32 version = 5; optional string owner = 6; optional UsersAndPermissions users_and_permissions = 7; + optional int64 ttl = 8 [default = 0]; } message SnapshotFileInfo { diff --git a/hbase-protocol/src/main/protobuf/HBase.proto b/hbase-protocol/src/main/protobuf/HBase.proto index 0dcf363c56b..5cf509d4183 100644 --- a/hbase-protocol/src/main/protobuf/HBase.proto +++ b/hbase-protocol/src/main/protobuf/HBase.proto @@ -177,6 +177,7 @@ message SnapshotDescription { optional Type type = 4 [default = FLUSH]; optional int32 version = 5; optional string owner = 6; + optional int64 ttl = 7 [default = 0]; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index d450aba60f1..78cf549a5db 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -114,6 +114,7 @@ import org.apache.hadoop.hbase.master.cleaner.CleanerChore; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.cleaner.ReplicationBarrierCleaner; +import org.apache.hadoop.hbase.master.cleaner.SnapshotCleanerChore; import org.apache.hadoop.hbase.master.locking.LockManager; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType; @@ -375,6 +376,7 @@ public class HMaster extends HRegionServer implements MasterServices { private RegionNormalizerChore normalizerChore; private ClusterStatusChore clusterStatusChore; private ClusterStatusPublisher clusterStatusPublisherChore = null; + private SnapshotCleanerChore snapshotCleanerChore = null; CatalogJanitor catalogJanitorChore; private LogCleaner logCleaner; @@ -1434,6 +1436,16 @@ public class HMaster extends HRegionServer implements MasterServices { replicationPeerManager); getChoreService().scheduleChore(replicationBarrierCleaner); + final boolean isSnapshotChoreDisabled = conf.getBoolean(HConstants.SNAPSHOT_CLEANER_DISABLE, + false); + if (isSnapshotChoreDisabled) { + if (LOG.isTraceEnabled()) { + LOG.trace("Snapshot Cleaner Chore is disabled. Not starting up the chore.."); + } + } else { + this.snapshotCleanerChore = new SnapshotCleanerChore(this, conf, getSnapshotManager()); + getChoreService().scheduleChore(this.snapshotCleanerChore); + } serviceStarted = true; if (LOG.isTraceEnabled()) { LOG.trace("Started service threads"); @@ -1551,6 +1563,7 @@ public class HMaster extends HRegionServer implements MasterServices { choreService.cancelChore(this.logCleaner); choreService.cancelChore(this.hfileCleaner); choreService.cancelChore(this.replicationBarrierCleaner); + choreService.cancelChore(this.snapshotCleanerChore); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java new file mode 100644 index 00000000000..7e12acd8394 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/SnapshotCleanerChore.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.cleaner; + + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ScheduledChore; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + +/** + * This chore, every time it runs, will try to delete snapshots that are expired based on TTL in + * seconds configured for each Snapshot + */ +@InterfaceAudience.Private +public class SnapshotCleanerChore extends ScheduledChore { + + private static final Logger LOG = LoggerFactory.getLogger(SnapshotCleanerChore.class); + private static final String SNAPSHOT_CLEANER_CHORE_NAME = "SnapshotCleaner"; + private static final String SNAPSHOT_CLEANER_INTERVAL = "hbase.master.cleaner.snapshot.interval"; + private static final int SNAPSHOT_CLEANER_DEFAULT_INTERVAL = 1800 * 1000; // Default 30 min + private static final String DELETE_SNAPSHOT_EVENT = + "Eligible Snapshot for cleanup due to expired TTL."; + + private final SnapshotManager snapshotManager; + + /** + * Construct Snapshot Cleaner Chore with parameterized constructor + * + * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup + * @param configuration The configuration to set + * @param snapshotManager SnapshotManager instance to manage lifecycle of snapshot + */ + public SnapshotCleanerChore(Stoppable stopper, Configuration configuration, + SnapshotManager snapshotManager) { + super(SNAPSHOT_CLEANER_CHORE_NAME, stopper, configuration.getInt(SNAPSHOT_CLEANER_INTERVAL, + SNAPSHOT_CLEANER_DEFAULT_INTERVAL)); + this.snapshotManager = snapshotManager; + } + + @Override + protected void chore() { + if (LOG.isTraceEnabled()) { + LOG.trace("Snapshot Cleaner Chore is starting up..."); + } + try { + List completedSnapshotsList = + this.snapshotManager.getCompletedSnapshots(); + for (SnapshotProtos.SnapshotDescription snapshotDescription : completedSnapshotsList) { + long snapshotCreatedTime = snapshotDescription.getCreationTime(); + long snapshotTtl = snapshotDescription.getTtl(); + /* + * Backward compatibility after the patch deployment on HMaster + * Any snapshot with ttl 0 is to be considered as snapshot to keep FOREVER + * Default ttl value specified by {@HConstants.DEFAULT_SNAPSHOT_TTL} + */ + if (snapshotCreatedTime > 0 && snapshotTtl > 0 && + snapshotTtl < TimeUnit.MILLISECONDS.toSeconds(Long.MAX_VALUE)) { + long currentTime = EnvironmentEdgeManager.currentTime(); + if ((snapshotCreatedTime + TimeUnit.SECONDS.toMillis(snapshotTtl)) < currentTime) { + LOG.info("Event: {} Name: {}, CreatedTime: {}, TTL: {}, currentTime: {}", + DELETE_SNAPSHOT_EVENT, snapshotDescription.getName(), snapshotCreatedTime, + snapshotTtl, currentTime); + deleteExpiredSnapshot(snapshotDescription); + } + } + } + } catch (IOException e) { + LOG.error("Error while cleaning up Snapshots...", e); + } + if (LOG.isTraceEnabled()) { + LOG.trace("Snapshot Cleaner Chore is closing..."); + } + } + + private void deleteExpiredSnapshot(SnapshotProtos.SnapshotDescription snapshotDescription) { + try { + this.snapshotManager.deleteSnapshot(snapshotDescription); + } catch (Exception e) { + LOG.error("Error while deleting Snapshot: {}", snapshotDescription.getName(), e); + } + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java index 5c25526a0d3..bbe2042d884 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.snapshot; import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.Collections; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; @@ -124,6 +125,8 @@ public final class SnapshotDescriptionUtils { /** Default value if no start time is specified */ public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0; + // Default value if no ttl is specified for Snapshot + private static final long NO_SNAPSHOT_TTL_SPECIFIED = 0; public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis"; @@ -317,6 +320,22 @@ public final class SnapshotDescriptionUtils { snapshot = builder.build(); } + long ttl = snapshot.getTtl(); + // set default ttl(sec) if it is not set already or the value is out of the range + if (ttl == SnapshotDescriptionUtils.NO_SNAPSHOT_TTL_SPECIFIED || + ttl > TimeUnit.MILLISECONDS.toSeconds(Long.MAX_VALUE)) { + final long defaultSnapshotTtl = conf.getLong(HConstants.DEFAULT_SNAPSHOT_TTL_CONFIG_KEY, + HConstants.DEFAULT_SNAPSHOT_TTL); + if (LOG.isDebugEnabled()) { + LOG.debug("Snapshot current TTL value: {} resetting it to default value: {}", ttl, + defaultSnapshotTtl); + } + ttl = defaultSnapshotTtl; + } + SnapshotDescription.Builder builder = snapshot.toBuilder(); + builder.setTtl(ttl); + snapshot = builder.build(); + // set the acl to snapshot if security feature is enabled. if (isSecurityAvailable(conf)) { snapshot = writeAclToSnapshotDescription(snapshot, conf); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java index 85a45ee45eb..78c70505109 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java @@ -374,12 +374,12 @@ public final class SnapshotInfo extends AbstractHBaseTool { // List Available Snapshots if (listSnapshots) { SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); - System.out.printf("%-20s | %-20s | %s%n", "SNAPSHOT", "CREATION TIME", "TABLE NAME"); + System.out.printf("%-20s | %-20s | %-20s | %s%n", "SNAPSHOT", "CREATION TIME", "TTL IN SEC", + "TABLE NAME"); for (SnapshotDescription desc: getSnapshotList(conf)) { - System.out.printf("%-20s | %20s | %s%n", - desc.getName(), - df.format(new Date(desc.getCreationTime())), - desc.getTableNameAsString()); + System.out.printf("%-20s | %20s | %20s | %s%n", desc.getName(), + df.format(new Date(desc.getCreationTime())), desc.getTtl(), + desc.getTableNameAsString()); } return 0; } @@ -432,6 +432,7 @@ public final class SnapshotInfo extends AbstractHBaseTool { System.out.println(" Table: " + snapshotDesc.getTable()); System.out.println(" Format: " + snapshotDesc.getVersion()); System.out.println("Created: " + df.format(new Date(snapshotDesc.getCreationTime()))); + System.out.println(" Ttl: " + snapshotDesc.getTtl()); System.out.println(" Owner: " + snapshotDesc.getOwner()); System.out.println(); } diff --git a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp index 1ea2b40a4e5..fc75ca773d8 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/snapshot.jsp @@ -36,6 +36,7 @@ SnapshotInfo.SnapshotStats stats = null; TableName snapshotTable = null; boolean tableExists = false; + long snapshotTtl = 0; if(snapshotName != null && master.isInitialized()) { try (Admin admin = master.getConnection().getAdmin()) { for (SnapshotDescription snapshotDesc: admin.listSnapshots()) { @@ -43,6 +44,7 @@ snapshot = snapshotDesc; stats = SnapshotInfo.getSnapshotStats(conf, snapshot); snapshotTable = snapshot.getTableName(); + snapshotTtl = snapshot.getTtl(); tableExists = admin.tableExists(snapshotTable); break; } @@ -91,6 +93,7 @@ Table Creation Time + Time To Live(Sec) Type Format Version State @@ -106,6 +109,13 @@ <% } %> <%= new Date(snapshot.getCreationTime()) %> + + <% if (snapshotTtl == 0) { %> + FOREVER + <% } else { %> + <%= snapshotTtl %> + <% } %> + <%= snapshot.getType() %> <%= snapshot.getVersion() %> <% if (stats.isSnapshotCorrupted()) { %> diff --git a/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp b/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp index 719ab4f2203..d270eb87ed4 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/snapshotsStats.jsp @@ -65,6 +65,7 @@ Snapshot Name Table Creation Time + TTL(Sec) Shared Storefile Size Mob Storefile Size Archived Storefile Size @@ -82,6 +83,13 @@ <%= snapshotTable.getNameAsString() %> <%= new Date(snapshotDesc.getCreationTime()) %> + + <% if (snapshotDesc.getTtl() == 0) { %> + FOREVER + <% } else { %> + <%= snapshotDesc.getTtl() %> + <% } %> + <%= StringUtils.humanReadableInt(stats.getSharedStoreFilesSize()) %> <%= StringUtils.humanReadableInt(stats.getMobStoreFilesSize()) %> <%= StringUtils.humanReadableInt(stats.getArchivedStoreFileSize()) %> diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java index b46404f0e28..d2a0eae08b2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java @@ -228,7 +228,7 @@ public class TestSnapshotFromClient { byte[] snapshot = Bytes.toBytes(SNAPSHOT_NAME); admin.snapshot(new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, - SnapshotType.DISABLED, null, -1, SnapshotManifestV1.DESCRIPTOR_VERSION)); + SnapshotType.DISABLED, null, -1, SnapshotManifestV1.DESCRIPTOR_VERSION, null)); LOG.debug("Snapshot completed."); // make sure we have the snapshot diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java index 6da06aca5ae..c5902156b8c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSnapshotTemporaryDirectory.java @@ -473,9 +473,8 @@ public class TestSnapshotTemporaryDirectory { private void takeSnapshot(TableName tableName, String snapshotName, boolean disabled) throws IOException { SnapshotType type = disabled ? SnapshotType.DISABLED : SnapshotType.FLUSH; - SnapshotDescription desc = - new SnapshotDescription(snapshotName, tableName.getNameAsString(), type, null, -1, - manifestVersion); + SnapshotDescription desc = new SnapshotDescription(snapshotName, tableName, type, null, -1, + manifestVersion, null); admin.snapshot(desc); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java new file mode 100644 index 00000000000..e05213d1c87 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotCleanerChore.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; + + +/** + * Tests for SnapshotsCleanerChore + */ +@Category({MasterTests.class, SmallTests.class}) +public class TestSnapshotCleanerChore { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestSnapshotCleanerChore.class); + + private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotCleanerChore.class); + + private static final HBaseTestingUtility HBASE_TESTING_UTILITY = new HBaseTestingUtility(); + + private SnapshotManager snapshotManager; + + private Configuration getSnapshotCleanerConf() { + Configuration conf = HBASE_TESTING_UTILITY.getConfiguration(); + conf.setInt("hbase.master.cleaner.snapshot.interval", 100); + return conf; + } + + + @Test + public void testSnapshotCleanerWithoutAnyCompletedSnapshot() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + try { + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithNoTtlExpired() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + List snapshotDescriptionList = new ArrayList<>(); + snapshotDescriptionList.add(getSnapshotDescription(-2, "snapshot01", "table01", + EnvironmentEdgeManager.currentTime() - 100000)); + snapshotDescriptionList.add(getSnapshotDescription(10, "snapshot02", "table02", + EnvironmentEdgeManager.currentTime())); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); + try { + LOG.info("2 Snapshots are completed but TTL is not expired for any of them"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithSomeTtlExpired() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + conf.setStrings("hbase.master.cleaner.snapshot.disable", "false"); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + List snapshotDescriptionList = new ArrayList<>(); + snapshotDescriptionList.add(getSnapshotDescription(10, "snapshot01", "table01", 1)); + snapshotDescriptionList.add(getSnapshotDescription(5, "snapshot02", "table02", 2)); + snapshotDescriptionList.add(getSnapshotDescription(30, "snapshot01", "table01", + EnvironmentEdgeManager.currentTime())); + snapshotDescriptionList.add(getSnapshotDescription(0, "snapshot02", "table02", + EnvironmentEdgeManager.currentTime())); + snapshotDescriptionList.add(getSnapshotDescription(40, "snapshot03", "table03", + EnvironmentEdgeManager.currentTime())); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); + try { + LOG.info("5 Snapshots are completed. TTL is expired for 2 them. Going to delete them"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(2)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotCleanerWithReadIOE() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = new HBaseTestingUtility().getConfiguration(); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenThrow(IOException.class); + try { + LOG.info("While getting completed Snapshots, IOException would occur. Hence, No Snapshot" + + " should be deleted"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(0)).deleteSnapshot(Mockito.any()); + } + + @Test + public void testSnapshotChoreWithTtlOutOfRange() throws IOException { + snapshotManager = Mockito.mock(SnapshotManager.class); + Stoppable stopper = new StoppableImplementation(); + Configuration conf = getSnapshotCleanerConf(); + List snapshotDescriptionList = new ArrayList<>(); + snapshotDescriptionList.add(getSnapshotDescription(Long.MAX_VALUE, "snapshot01", "table01", 1)); + snapshotDescriptionList.add(getSnapshotDescription(5, "snapshot02", "table02", 2)); + Mockito.when(snapshotManager.getCompletedSnapshots()).thenReturn(snapshotDescriptionList); + SnapshotCleanerChore snapshotCleanerChore = + new SnapshotCleanerChore(stopper, conf, snapshotManager); + try { + LOG.info("Snapshot Chore is disabled. No cleanup performed for Expired Snapshots"); + snapshotCleanerChore.chore(); + } finally { + stopper.stop("Stopping Test Stopper"); + } + Mockito.verify(snapshotManager, Mockito.times(1)).getCompletedSnapshots(); + } + + private SnapshotProtos.SnapshotDescription getSnapshotDescription(final long ttl, + final String snapshotName, final String tableName, final long snapshotCreationTime) { + SnapshotProtos.SnapshotDescription.Builder snapshotDescriptionBuilder = + SnapshotProtos.SnapshotDescription.newBuilder(); + snapshotDescriptionBuilder.setTtl(ttl); + snapshotDescriptionBuilder.setName(snapshotName); + snapshotDescriptionBuilder.setTable(tableName); + snapshotDescriptionBuilder.setType(SnapshotProtos.SnapshotDescription.Type.FLUSH); + snapshotDescriptionBuilder.setCreationTime(snapshotCreationTime); + return snapshotDescriptionBuilder.build(); + } + + /** + * Simple helper class that just keeps track of whether or not its stopped. + */ + private static class StoppableImplementation implements Stoppable { + + private volatile boolean stop = false; + + @Override + public void stop(String why) { + this.stop = true; + } + + @Override + public boolean isStopped() { + return this.stop; + } + + } + +} \ No newline at end of file diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index ddce0d985da..65a60613300 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -1065,11 +1065,15 @@ module Hbase @admin.snapshot(snapshot_name, table_name) else args.each do |arg| + ttl = arg[TTL] + ttl = ttl ? ttl.to_java(:long) : -1 + snapshot_props = java.util.HashMap.new + snapshot_props.put("TTL", ttl) if arg[SKIP_FLUSH] == true @admin.snapshot(snapshot_name, table_name, - org.apache.hadoop.hbase.client.SnapshotType::SKIPFLUSH) + org.apache.hadoop.hbase.client.SnapshotType::SKIPFLUSH, snapshot_props) else - @admin.snapshot(snapshot_name, table_name) + @admin.snapshot(snapshot_name, table_name, snapshot_props) end end end diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc index 2bc7a7fc23c..7a5fc255f18 100644 --- a/src/main/asciidoc/_chapters/hbase-default.adoc +++ b/src/main/asciidoc/_chapters/hbase-default.adoc @@ -2131,3 +2131,33 @@ The percent of region server RPC threads failed to abort RS. .Default `0.5` + +[[hbase.master.cleaner.snapshot.interval]] +*`hbase.master.cleaner.snapshot.interval`*:: ++ +.Description + + Snapshot Cleanup chore interval in milliseconds. + The cleanup thread keeps running at this interval + to find all snapshots that are expired based on TTL + and delete them. + ++ +.Default +`1800000` + + +[[hbase.master.snapshot.ttl]] +*`hbase.master.snapshot.ttl`*:: ++ +.Description + + Default Snapshot TTL to be considered when the user + does not specify TTL while creating snapshot. + Default value 0 indicates FOREVERE - snapshot should not be + automatically deleted until it is manually deleted + + ++ +.Default +`0` diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 878d1f33834..e6668d245e3 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -2726,6 +2726,47 @@ A snapshot is only a representation of a table during a window of time. The amount of time the snapshot operation will take to reach each Region Server may vary from a few seconds to a minute, depending on the resource load and speed of the hardware or network, among other factors. There is also no way to know whether a given insert or update is in memory or has been flushed. + +.Take a Snapshot With TTL +Snapshots have a lifecycle that is independent from the table from which they are created. +Although data in a table may be stored with TTL the data files containing them become +frozen by the snapshot. Space consumed by expired cells will not be reclaimed by normal +table housekeeping like compaction. While this is expected it can be inconvenient at scale. +When many snapshots are under management and the data in various tables is expired by +TTL some notion of optional TTL (and optional default TTL) for snapshots could be useful. + + +---- +hbase> snapshot 'mytable', 'snapshot1234', {TTL => 86400} +---- + +The above command creates snapshot `snapshot1234` with TTL of 86400 sec(24 hours) +and hence, the snapshot is supposed to be cleaned up after 24 hours + + + +.Default Snapshot TTL: + +- FOREVER by default +- User specified Default TTL with config `hbase.master.snapshot.ttl` + + +While creating a Snapshot, if TTL in seconds is not specified, by default the snapshot +would not be deleted automatically. i.e. it would be retained forever until it is +manually deleted. However, the user can update this default TTL behavior by +providing default TTL in sec for key: `hbase.master.snapshot.ttl`. +Value 0 for this config indicates TTL: FOREVER + + + + +At any point of time, if Snapshot cleanup is supposed to be stopped due to +some snapshot restore activity, it is advisable to disable Snapshot Cleaner with + config: + +`hbase.master.cleaner.snapshot.disable`: "true" + + [[ops.snapshots.list]] === Listing Snapshots