From 86e5db50992e9b297014e9c9824429e1cefb824b Mon Sep 17 00:00:00 2001 From: Matteo Bertozzi Date: Mon, 2 Jun 2014 08:03:19 +0100 Subject: [PATCH] HBASE-10935 support snapshot policy where flush memstore can be skipped to prevent production cluster freeze (Tianying Chang) --- .../hadoop/hbase/client/HBaseAdmin.java | 20 +++++++++ .../hbase/protobuf/generated/HBaseProtos.java | 44 ++++++++++++------- hbase-protocol/src/main/protobuf/HBase.proto | 1 + .../snapshot/FlushSnapshotSubprocedure.java | 26 +++++++++-- .../snapshot/RegionServerSnapshotManager.java | 13 ++++++ .../snapshot/TestFlushSnapshotFromClient.java | 43 ++++++++++++++++++ hbase-shell/src/main/ruby/hbase.rb | 1 + hbase-shell/src/main/ruby/hbase/admin.rb | 15 ++++++- .../src/main/ruby/shell/commands/snapshot.rb | 6 +-- 9 files changed, 144 insertions(+), 25 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index c6b11fd6760..95b5c072b36 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -2626,6 +2626,26 @@ public class HBaseAdmin implements Admin { SnapshotDescription.Type.FLUSH); } + /** + * Create snapshot for the given table of given flush type. + *

+ * Snapshots are considered unique based on the name of the snapshot. Attempts to take a + * snapshot with the same name (even a different type or with different parameters) will fail with + * a {@link SnapshotCreationException} indicating the duplicate naming. + *

+ * Snapshot names follow the same naming constraints as tables in HBase. + * @param snapshotName name of the snapshot to be created + * @param tableName name of the table for which snapshot is created + * @param flushType if the snapshot should be taken without flush memstore first + * @throws IOException if a remote or network exception occurs + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public void snapshot(final byte[] snapshotName, final byte[] tableName, + final SnapshotDescription.Type flushType) throws + IOException, SnapshotCreationException, IllegalArgumentException { + snapshot(Bytes.toString(snapshotName), Bytes.toString(tableName), flushType); + } /** public void snapshot(final String snapshotName, * Create a timestamp consistent snapshot for the given table. diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java index 238db310766..9c0447eccf1 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java @@ -10476,6 +10476,10 @@ public final class HBaseProtos { * FLUSH = 1; */ FLUSH(1, 1), + /** + * SKIPFLUSH = 2; + */ + SKIPFLUSH(2, 2), ; /** @@ -10486,6 +10490,10 @@ public final class HBaseProtos { * FLUSH = 1; */ public static final int FLUSH_VALUE = 1; + /** + * SKIPFLUSH = 2; + */ + public static final int SKIPFLUSH_VALUE = 2; public final int getNumber() { return value; } @@ -10494,6 +10502,7 @@ public final class HBaseProtos { switch (value) { case 0: return DISABLED; case 1: return FLUSH; + case 2: return SKIPFLUSH; default: return null; } } @@ -16241,26 +16250,27 @@ public final class HBaseProtos { ",\n\rNameBytesPair\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030" + "\002 \001(\014\"/\n\016BytesBytesPair\022\r\n\005first\030\001 \002(\014\022\016" + "\n\006second\030\002 \002(\014\",\n\rNameInt64Pair\022\014\n\004name\030" + - "\001 \001(\t\022\r\n\005value\030\002 \001(\003\"\256\001\n\023SnapshotDescrip" + + "\001 \001(\t\022\r\n\005value\030\002 \001(\003\"\275\001\n\023SnapshotDescrip" + "tion\022\014\n\004name\030\001 \002(\t\022\r\n\005table\030\002 \001(\t\022\030\n\rcre" + "ation_time\030\003 \001(\003:\0010\022.\n\004type\030\004 \001(\0162\031.Snap" + "shotDescription.Type:\005FLUSH\022\017\n\007version\030\005" + - " \001(\005\"\037\n\004Type\022\014\n\010DISABLED\020\000\022\t\n\005FLUSH\020\001\"}\n", - "\024ProcedureDescription\022\021\n\tsignature\030\001 \002(\t" + - "\022\020\n\010instance\030\002 \001(\t\022\030\n\rcreation_time\030\003 \001(" + - "\003:\0010\022&\n\rconfiguration\030\004 \003(\0132\017.NameString" + - "Pair\"\n\n\010EmptyMsg\"\033\n\007LongMsg\022\020\n\010long_msg\030" + - "\001 \002(\003\"\037\n\tDoubleMsg\022\022\n\ndouble_msg\030\001 \002(\001\"\'" + - "\n\rBigDecimalMsg\022\026\n\016bigdecimal_msg\030\001 \002(\014\"" + - "5\n\004UUID\022\026\n\016least_sig_bits\030\001 \002(\004\022\025\n\rmost_" + - "sig_bits\030\002 \002(\004\"K\n\023NamespaceDescriptor\022\014\n" + - "\004name\030\001 \002(\014\022&\n\rconfiguration\030\002 \003(\0132\017.Nam" + - "eStringPair\"$\n\020RegionServerInfo\022\020\n\010infoP", - "ort\030\001 \001(\005*r\n\013CompareType\022\010\n\004LESS\020\000\022\021\n\rLE" + - "SS_OR_EQUAL\020\001\022\t\n\005EQUAL\020\002\022\r\n\tNOT_EQUAL\020\003\022" + - "\024\n\020GREATER_OR_EQUAL\020\004\022\013\n\007GREATER\020\005\022\t\n\005NO" + - "_OP\020\006B>\n*org.apache.hadoop.hbase.protobu" + - "f.generatedB\013HBaseProtosH\001\240\001\001" + " \001(\005\".\n\004Type\022\014\n\010DISABLED\020\000\022\t\n\005FLUSH\020\001\022\r\n", + "\tSKIPFLUSH\020\002\"}\n\024ProcedureDescription\022\021\n\t" + + "signature\030\001 \002(\t\022\020\n\010instance\030\002 \001(\t\022\030\n\rcre" + + "ation_time\030\003 \001(\003:\0010\022&\n\rconfiguration\030\004 \003" + + "(\0132\017.NameStringPair\"\n\n\010EmptyMsg\"\033\n\007LongM" + + "sg\022\020\n\010long_msg\030\001 \002(\003\"\037\n\tDoubleMsg\022\022\n\ndou" + + "ble_msg\030\001 \002(\001\"\'\n\rBigDecimalMsg\022\026\n\016bigdec" + + "imal_msg\030\001 \002(\014\"5\n\004UUID\022\026\n\016least_sig_bits" + + "\030\001 \002(\004\022\025\n\rmost_sig_bits\030\002 \002(\004\"K\n\023Namespa" + + "ceDescriptor\022\014\n\004name\030\001 \002(\014\022&\n\rconfigurat" + + "ion\030\002 \003(\0132\017.NameStringPair\"$\n\020RegionServ", + "erInfo\022\020\n\010infoPort\030\001 \001(\005*r\n\013CompareType\022" + + "\010\n\004LESS\020\000\022\021\n\rLESS_OR_EQUAL\020\001\022\t\n\005EQUAL\020\002\022" + + "\r\n\tNOT_EQUAL\020\003\022\024\n\020GREATER_OR_EQUAL\020\004\022\013\n\007" + + "GREATER\020\005\022\t\n\005NO_OP\020\006B>\n*org.apache.hadoo" + + "p.hbase.protobuf.generatedB\013HBaseProtosH" + + "\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { diff --git a/hbase-protocol/src/main/protobuf/HBase.proto b/hbase-protocol/src/main/protobuf/HBase.proto index 5622735b7b7..3e3d570d78c 100644 --- a/hbase-protocol/src/main/protobuf/HBase.proto +++ b/hbase-protocol/src/main/protobuf/HBase.proto @@ -159,6 +159,7 @@ message SnapshotDescription { enum Type { DISABLED = 0; FLUSH = 1; + SKIPFLUSH = 2; } optional Type type = 4 [default = FLUSH]; optional int32 version = 5; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java index a7a5186ad09..5a2c113a629 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java @@ -48,6 +48,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure { private final List regions; private final SnapshotDescription snapshot; private final SnapshotSubprocedurePool taskManager; + private boolean snapshotSkipFlush = false; public FlushSnapshotSubprocedure(ProcedureMember member, ForeignExceptionDispatcher errorListener, long wakeFrequency, long timeout, @@ -55,6 +56,10 @@ public class FlushSnapshotSubprocedure extends Subprocedure { SnapshotSubprocedurePool taskManager) { super(member, snapshot.getName(), errorListener, wakeFrequency, timeout); this.snapshot = snapshot; + + if (this.snapshot.getType() == SnapshotDescription.Type.SKIPFLUSH) { + snapshotSkipFlush = true; + } this.regions = regions; this.taskManager = taskManager; } @@ -78,10 +83,25 @@ public class FlushSnapshotSubprocedure extends Subprocedure { LOG.debug("Starting region operation on " + region); region.startRegionOperation(); try { - LOG.debug("Flush Snapshotting region " + region.toString() + " started..."); - region.flushcache(); + if (snapshotSkipFlush) { + /* + * This is to take an online-snapshot without force a coordinated flush to prevent pause + * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure + * should be renamed to distributedSnapshotSubprocedure, and the flush() behavior can be + * turned on/off based on the flush type. + * To minimized the code change, class name is not changed. + */ + LOG.debug("take snapshot without flush memstore first"); + } else { + LOG.debug("Flush Snapshotting region " + region.toString() + " started..."); + region.flushcache(); + } region.addRegionToSnapshot(snapshot, monitor); - LOG.debug("... Flush Snapshotting region " + region.toString() + " completed."); + if (snapshotSkipFlush) { + LOG.debug("... SkipFlush Snapshotting region " + region.toString() + " completed."); + } else { + LOG.debug("... Flush Snapshotting region " + region.toString() + " completed."); + } } finally { LOG.debug("Closing region operation on " + region); region.closeRegionOperation(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java index 4a4ee79931c..e78d6902fa9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java @@ -186,6 +186,19 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager { new SnapshotSubprocedurePool(rss.getServerName().toString(), conf); return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis, timeoutMillis, involvedRegions, snapshot, taskManager); + case SKIPFLUSH: + /* + * This is to take an online-snapshot without force a coordinated flush to prevent pause + * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure + * should be renamed to distributedSnapshotSubprocedure, and the flush() behavior can be + * turned on/off based on the flush type. + * To minimized the code change, class name is not changed. + */ + SnapshotSubprocedurePool taskManager2 = + new SnapshotSubprocedurePool(rss.getServerName().toString(), conf); + return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis, + timeoutMillis, involvedRegions, snapshot, taskManager2); + default: throw new UnsupportedOperationException("Unrecognized snapshot type:" + snapshot.getType()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java index f0e7985caab..fd69d626134 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java @@ -175,6 +175,49 @@ public class TestFlushSnapshotFromClient { admin, fs); } + /** + * Test snapshotting a table that is online without flushing + * @throws Exception + */ + @Test(timeout=30000) + public void testSkipFlushTableSnapshot() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + + // put some stuff in the table + HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME); + UTIL.loadTable(table, TEST_FAM); + + LOG.debug("FS state before snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + // take a snapshot of the enabled table + String snapshotString = "skipFlushTableSnapshot"; + byte[] snapshot = Bytes.toBytes(snapshotString); + admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.SKIPFLUSH); + LOG.debug("Snapshot completed."); + + // make sure we have the snapshot + List snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, + snapshot, TABLE_NAME); + + // make sure its a valid snapshot + FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); + Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); + LOG.debug("FS state after snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir, + admin, fs); + + admin.deleteSnapshot(snapshot); + snapshots = admin.listSnapshots(); + SnapshotTestingUtils.assertNoSnapshots(admin); + } + /** * Test simple flush snapshotting a table that is online diff --git a/hbase-shell/src/main/ruby/hbase.rb b/hbase-shell/src/main/ruby/hbase.rb index 3c09c4dee6d..fcd11fcb495 100644 --- a/hbase-shell/src/main/ruby/hbase.rb +++ b/hbase-shell/src/main/ruby/hbase.rb @@ -61,6 +61,7 @@ module HBaseConstants ATTRIBUTES="ATTRIBUTES" VISIBILITY="VISIBILITY" AUTHORIZATIONS = "AUTHORIZATIONS" + SKIP_FLUSH = 'SKIP_FLUSH' # Load constants from hbase java API def self.promote_constants(constants) diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index 3d975016099..43ccad37adc 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -22,6 +22,7 @@ java_import java.util.Arrays java_import org.apache.hadoop.hbase.util.Pair java_import org.apache.hadoop.hbase.util.RegionSplitter java_import org.apache.hadoop.hbase.util.Bytes +java_import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos::SnapshotDescription # Wrapper for org.apache.hadoop.hbase.client.HBaseAdmin @@ -718,8 +719,18 @@ module Hbase #---------------------------------------------------------------------------------------------- # Take a snapshot of specified table - def snapshot(table, snapshot_name) - @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes) + def snapshot(table, snapshot_name, *args) + if args.empty? + @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes) + else + args.each do |arg| + if arg[SKIP_FLUSH] == true + @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes, SnapshotDescription::Type::SKIPFLUSH) + else + @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes) + end + end + end end #---------------------------------------------------------------------------------------------- diff --git a/hbase-shell/src/main/ruby/shell/commands/snapshot.rb b/hbase-shell/src/main/ruby/shell/commands/snapshot.rb index 62de845fa31..15bf298e043 100644 --- a/hbase-shell/src/main/ruby/shell/commands/snapshot.rb +++ b/hbase-shell/src/main/ruby/shell/commands/snapshot.rb @@ -24,13 +24,13 @@ module Shell Take a snapshot of specified table. Examples: hbase> snapshot 'sourceTable', 'snapshotName' - hbase> snapshot 'namespace:sourceTable', 'snapshotName' + hbase> snapshot 'namespace:sourceTable', 'snapshotName', {SKIP_FLUSH => true} EOF end - def command(table, snapshot_name) + def command(table, snapshot_name, *args) format_simple_command do - admin.snapshot(table, snapshot_name) + admin.snapshot(table, snapshot_name, *args) end end end