HBASE-19290 Reduce zk request when doing split log
This commit is contained in:
parent
b4a4be65ea
commit
8b32d37929
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.coordination;
|
package org.apache.hadoop.hbase.coordination;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.LongAdder;
|
import java.util.concurrent.atomic.LongAdder;
|
||||||
|
@ -199,27 +200,28 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
* try to grab a 'lock' on the task zk node to own and execute the task.
|
* try to grab a 'lock' on the task zk node to own and execute the task.
|
||||||
* <p>
|
* <p>
|
||||||
* @param path zk node for the task
|
* @param path zk node for the task
|
||||||
|
* @return boolean value when grab a task success return true otherwise false
|
||||||
*/
|
*/
|
||||||
private void grabTask(String path) {
|
private boolean grabTask(String path) {
|
||||||
Stat stat = new Stat();
|
Stat stat = new Stat();
|
||||||
byte[] data;
|
byte[] data;
|
||||||
synchronized (grabTaskLock) {
|
synchronized (grabTaskLock) {
|
||||||
currentTask = path;
|
currentTask = path;
|
||||||
workerInGrabTask = true;
|
workerInGrabTask = true;
|
||||||
if (Thread.interrupted()) {
|
if (Thread.interrupted()) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
if ((data = ZKUtil.getDataNoWatch(watcher, path, stat)) == null) {
|
if ((data = ZKUtil.getDataNoWatch(watcher, path, stat)) == null) {
|
||||||
SplitLogCounters.tot_wkr_failed_to_grab_task_no_data.increment();
|
SplitLogCounters.tot_wkr_failed_to_grab_task_no_data.increment();
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.warn("Failed to get data for znode " + path, e);
|
LOG.warn("Failed to get data for znode " + path, e);
|
||||||
SplitLogCounters.tot_wkr_failed_to_grab_task_exception.increment();
|
SplitLogCounters.tot_wkr_failed_to_grab_task_exception.increment();
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
SplitLogTask slt;
|
SplitLogTask slt;
|
||||||
try {
|
try {
|
||||||
|
@ -227,18 +229,18 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
} catch (DeserializationException e) {
|
} catch (DeserializationException e) {
|
||||||
LOG.warn("Failed parse data for znode " + path, e);
|
LOG.warn("Failed parse data for znode " + path, e);
|
||||||
SplitLogCounters.tot_wkr_failed_to_grab_task_exception.increment();
|
SplitLogCounters.tot_wkr_failed_to_grab_task_exception.increment();
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
if (!slt.isUnassigned()) {
|
if (!slt.isUnassigned()) {
|
||||||
SplitLogCounters.tot_wkr_failed_to_grab_task_owned.increment();
|
SplitLogCounters.tot_wkr_failed_to_grab_task_owned.increment();
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
currentVersion =
|
currentVersion =
|
||||||
attemptToOwnTask(true, watcher, server.getServerName(), path, stat.getVersion());
|
attemptToOwnTask(true, watcher, server.getServerName(), path, stat.getVersion());
|
||||||
if (currentVersion < 0) {
|
if (currentVersion < 0) {
|
||||||
SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.increment();
|
SplitLogCounters.tot_wkr_failed_to_grab_task_lost_race.increment();
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZKSplitLog.isRescanNode(watcher, currentTask)) {
|
if (ZKSplitLog.isRescanNode(watcher, currentTask)) {
|
||||||
|
@ -249,7 +251,7 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
|
|
||||||
endTask(new SplitLogTask.Done(server.getServerName()),
|
endTask(new SplitLogTask.Done(server.getServerName()),
|
||||||
SplitLogCounters.tot_wkr_task_acquired_rescan, splitTaskDetails);
|
SplitLogCounters.tot_wkr_task_acquired_rescan, splitTaskDetails);
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("worker " + server.getServerName() + " acquired task " + path);
|
LOG.info("worker " + server.getServerName() + " acquired task " + path);
|
||||||
|
@ -266,6 +268,7 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
LOG.warn("Interrupted while yielding for other region servers", e);
|
LOG.warn("Interrupted while yielding for other region servers", e);
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
} finally {
|
} finally {
|
||||||
synchronized (grabTaskLock) {
|
synchronized (grabTaskLock) {
|
||||||
workerInGrabTask = false;
|
workerInGrabTask = false;
|
||||||
|
@ -316,12 +319,13 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function calculates how many splitters it could create based on expected average tasks per
|
* This function calculates how many splitters this RS should create based on expected average
|
||||||
* RS and the hard limit upper bound(maxConcurrentTasks) set by configuration. <br>
|
* tasks per RS and the hard limit upper bound(maxConcurrentTasks) set by configuration. <br>
|
||||||
* At any given time, a RS allows spawn MIN(Expected Tasks/RS, Hard Upper Bound)
|
* At any given time, a RS allows spawn MIN(Expected Tasks/RS, Hard Upper Bound)
|
||||||
* @param numTasks current total number of available tasks
|
* @param numTasks total number of split tasks available
|
||||||
|
* @return number of tasks this RS can grab
|
||||||
*/
|
*/
|
||||||
private int calculateAvailableSplitters(int numTasks) {
|
private int getNumExpectedTasksPerRS(int numTasks) {
|
||||||
// at lease one RS(itself) available
|
// at lease one RS(itself) available
|
||||||
int availableRSs = 1;
|
int availableRSs = 1;
|
||||||
try {
|
try {
|
||||||
|
@ -332,12 +336,17 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
// do nothing
|
// do nothing
|
||||||
LOG.debug("getAvailableRegionServers got ZooKeeper exception", e);
|
LOG.debug("getAvailableRegionServers got ZooKeeper exception", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
int expectedTasksPerRS = (numTasks / availableRSs) + ((numTasks % availableRSs == 0) ? 0 : 1);
|
int expectedTasksPerRS = (numTasks / availableRSs) + ((numTasks % availableRSs == 0) ? 0 : 1);
|
||||||
expectedTasksPerRS = Math.max(1, expectedTasksPerRS); // at least be one
|
return Math.max(1, expectedTasksPerRS); // at least be one
|
||||||
// calculate how many more splitters we could spawn
|
}
|
||||||
return Math.min(expectedTasksPerRS, maxConcurrentTasks)
|
|
||||||
- this.tasksInProgress.get();
|
/**
|
||||||
|
* @param expectedTasksPerRS Average number of tasks to be handled by each RS
|
||||||
|
* @return true if more splitters are available, otherwise false.
|
||||||
|
*/
|
||||||
|
private boolean areSplittersAvailable(int expectedTasksPerRS) {
|
||||||
|
return (Math.min(expectedTasksPerRS, maxConcurrentTasks)
|
||||||
|
- this.tasksInProgress.get()) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -406,8 +415,11 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
+ " ... worker thread exiting.");
|
+ " ... worker thread exiting.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// shuffle the paths to prevent different split log worker start from the same log file after
|
||||||
|
// meta log (if any)
|
||||||
|
Collections.shuffle(paths);
|
||||||
// pick meta wal firstly
|
// pick meta wal firstly
|
||||||
int offset = (int) (Math.random() * paths.size());
|
int offset = 0;
|
||||||
for (int i = 0; i < paths.size(); i++) {
|
for (int i = 0; i < paths.size(); i++) {
|
||||||
if (AbstractFSWALProvider.isMetaFile(paths.get(i))) {
|
if (AbstractFSWALProvider.isMetaFile(paths.get(i))) {
|
||||||
offset = i;
|
offset = i;
|
||||||
|
@ -415,21 +427,33 @@ public class ZkSplitLogWorkerCoordination extends ZKListener implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int numTasks = paths.size();
|
int numTasks = paths.size();
|
||||||
|
int expectedTasksPerRS = getNumExpectedTasksPerRS(numTasks);
|
||||||
|
boolean taskGrabbed = false;
|
||||||
for (int i = 0; i < numTasks; i++) {
|
for (int i = 0; i < numTasks; i++) {
|
||||||
|
while (!shouldStop) {
|
||||||
|
if (this.areSplittersAvailable(expectedTasksPerRS)) {
|
||||||
|
LOG.debug("Current region server " + server.getServerName()
|
||||||
|
+ " is ready to take more tasks, will get task list and try grab tasks again.");
|
||||||
int idx = (i + offset) % paths.size();
|
int idx = (i + offset) % paths.size();
|
||||||
// don't call ZKSplitLog.getNodeName() because that will lead to
|
// don't call ZKSplitLog.getNodeName() because that will lead to
|
||||||
// double encoding of the path name
|
// double encoding of the path name
|
||||||
if (this.calculateAvailableSplitters(numTasks) > 0) {
|
taskGrabbed |= grabTask(ZNodePaths.joinZNode(
|
||||||
grabTask(ZNodePaths.joinZNode(watcher.znodePaths.splitLogZNode, paths.get(idx)));
|
watcher.znodePaths.splitLogZNode, paths.get(idx)));
|
||||||
|
break;
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Current region server " + server.getServerName() + " has "
|
LOG.debug("Current region server " + server.getServerName() + " has "
|
||||||
+ this.tasksInProgress.get() + " tasks in progress and can't take more.");
|
+ this.tasksInProgress.get() + " tasks in progress and can't take more.");
|
||||||
break;
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (shouldStop) {
|
if (shouldStop) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!taskGrabbed && !shouldStop) {
|
||||||
|
// do not grab any tasks, sleep a little bit to reduce zk request.
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
SplitLogCounters.tot_wkr_task_grabing.increment();
|
SplitLogCounters.tot_wkr_task_grabing.increment();
|
||||||
synchronized (taskReadySeq) {
|
synchronized (taskReadySeq) {
|
||||||
while (seq_start == taskReadySeq.get()) {
|
while (seq_start == taskReadySeq.get()) {
|
||||||
|
|
Loading…
Reference in New Issue