From e95cf479c7615ae160a6ba963cc7689f3b440efd Mon Sep 17 00:00:00 2001 From: tedyu Date: Fri, 21 Apr 2017 16:15:07 -0700 Subject: [PATCH] HBASE-16314 Retry on table snapshot failure during full backup (Vladimir Rodionov) --- .../hbase/backup/BackupRestoreConstants.java | 10 ++++++ .../backup/impl/FullTableBackupClient.java | 36 +++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java index e46904b656c..d1ab2464821 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java @@ -37,6 +37,16 @@ public interface BackupRestoreConstants { public final static int BACKUP_SYSTEM_TTL_DEFAULT = HConstants.FOREVER; public final static String BACKUP_ENABLE_KEY = "hbase.backup.enable"; public final static boolean BACKUP_ENABLE_DEFAULT = false; + + + public static final String BACKUP_MAX_ATTEMPTS_KEY = "hbase.backup.attempts.max"; + public static final int DEFAULT_BACKUP_MAX_ATTEMPTS = 10; + + public static final String BACKUP_ATTEMPTS_PAUSE_MS_KEY = "hbase.backup.attempts.pause.ms"; + public static final int DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS = 10000; + + + /* * Drivers option list */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java index 77d11844295..ee7a84140eb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/FullTableBackupClient.java @@ -18,6 +18,11 @@ package org.apache.hadoop.hbase.backup.impl; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_ATTEMPTS_PAUSE_MS_KEY; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.BACKUP_MAX_ATTEMPTS_KEY; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS; +import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.DEFAULT_BACKUP_MAX_ATTEMPTS; + import java.io.IOException; import java.util.HashMap; import java.util.List; @@ -148,8 +153,7 @@ public class FullTableBackupClient extends TableBackupClient { "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_" + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString(); - admin.snapshot(snapshotName, tableName); - + snapshotTable(admin, tableName, snapshotName); backupInfo.setSnapshotName(tableName, snapshotName); } @@ -186,4 +190,32 @@ public class FullTableBackupClient extends TableBackupClient { } + private void snapshotTable(Admin admin, TableName tableName, String snapshotName) + throws IOException { + + int maxAttempts = + conf.getInt(BACKUP_MAX_ATTEMPTS_KEY, DEFAULT_BACKUP_MAX_ATTEMPTS); + int pause = + conf.getInt(BACKUP_ATTEMPTS_PAUSE_MS_KEY, DEFAULT_BACKUP_ATTEMPTS_PAUSE_MS); + int attempts = 0; + + while (attempts++ < maxAttempts) { + try { + admin.snapshot(snapshotName, tableName); + return; + } catch (IOException ee) { + LOG.warn("Snapshot attempt " + attempts + " failed for table " + tableName + + ", sleeping for " + pause + "ms", ee); + if (attempts < maxAttempts) { + try { + Thread.sleep(pause); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + } + } + throw new IOException("Failed to snapshot table "+ tableName); + } }