From 97acb9ef24094042548a2981bdd99767156caeb3 Mon Sep 17 00:00:00 2001 From: Andrew Purtell Date: Fri, 24 Oct 2014 14:17:46 -0700 Subject: [PATCH] HBASE-11964 Improve spreading replication load from failed regionservers --- .../hbase/replication/regionserver/ReplicationSource.java | 7 ++++--- .../replication/regionserver/ReplicationSourceManager.java | 3 ++- .../hadoop/hbase/replication/TestReplicationBase.java | 2 ++ .../regionserver/TestReplicationSourceManager.java | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java index 04a2ad66237..02b97f2309f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java @@ -161,7 +161,10 @@ public class ReplicationSource extends Thread this.conf.getLong("replication.source.size.capacity", 1024*1024*64); this.replicationQueueNbCapacity = this.conf.getInt("replication.source.nb.capacity", 25000); - this.maxRetriesMultiplier = this.conf.getInt("replication.source.maxretriesmultiplier", 10); + this.sleepForRetries = + this.conf.getLong("replication.source.sleepforretries", 1000); // 1 second + this.maxRetriesMultiplier = + this.conf.getInt("replication.source.maxretriesmultiplier", 300); // 5 minutes @ 1 sec per this.queue = new PriorityBlockingQueue( this.conf.getInt("hbase.regionserver.maxlogs", 32), @@ -171,8 +174,6 @@ public class ReplicationSource extends Thread this.replicationQueues = replicationQueues; this.replicationPeers = replicationPeers; this.manager = manager; - this.sleepForRetries = - this.conf.getLong("replication.source.sleepforretries", 1000); this.fs = fs; this.metrics = metrics; this.repLogReader = new ReplicationHLogReaderManager(this.fs, this.conf); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java index cb0f6ce68ff..9a095433d91 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java @@ -137,7 +137,8 @@ public class ReplicationSourceManager implements ReplicationListener { this.fs = fs; this.logDir = logDir; this.oldLogDir = oldLogDir; - this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 2000); + this.sleepBeforeFailover = + conf.getLong("replication.sleep.before.failover", 30000); // 30 seconds this.clusterId = clusterId; this.replicationTracker.registerListener(this); this.replicationPeers.getAllPeerIds(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java index 9fe2e0bcc2c..ea524a18896 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java @@ -99,6 +99,8 @@ public class TestReplicationBase { conf1.setLong(HConstants.THREAD_WAKE_FREQUENCY, 100); conf1.setInt("replication.stats.thread.period.seconds", 5); conf1.setBoolean("hbase.tests.use.shortcircuit.reads", false); + conf1.setLong("replication.sleep.before.failover", 2000); + conf1.setInt("replication.source.maxretriesmultiplier", 10); utility1 = new HBaseTestingUtility(conf1); utility1.startMiniZKCluster(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java index 084f8087d60..43b37d01f08 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java @@ -129,6 +129,8 @@ public class TestReplicationSourceManager { ReplicationSourceDummy.class.getCanonicalName()); conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT); + conf.setLong("replication.sleep.before.failover", 2000); + conf.setInt("replication.source.maxretriesmultiplier", 10); utility = new HBaseTestingUtility(conf); utility.startMiniZKCluster();