HBASE-5892 [hbck] Refactor parallel WorkItem* to Futures (Andrew Wang)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1345890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
57a542e685
commit
b737e2e8bd
|
@ -32,10 +32,11 @@ import java.util.SortedMap;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ConcurrentSkipListMap;
|
import java.util.concurrent.ConcurrentSkipListMap;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -157,7 +158,7 @@ public class HBaseFsck {
|
||||||
private HConnection connection;
|
private HConnection connection;
|
||||||
private HBaseAdmin admin;
|
private HBaseAdmin admin;
|
||||||
private HTable meta;
|
private HTable meta;
|
||||||
private ThreadPoolExecutor executor; // threads to retrieve data from regionservers
|
private ScheduledThreadPoolExecutor executor; // threads to retrieve data from regionservers
|
||||||
private long startMillis = System.currentTimeMillis();
|
private long startMillis = System.currentTimeMillis();
|
||||||
|
|
||||||
/***********
|
/***********
|
||||||
|
@ -223,10 +224,7 @@ public class HBaseFsck {
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
|
||||||
int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
|
int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
|
||||||
executor = new ThreadPoolExecutor(numThreads, numThreads,
|
executor = new ScheduledThreadPoolExecutor(numThreads);
|
||||||
THREADS_KEEP_ALIVE_SECONDS, TimeUnit.SECONDS,
|
|
||||||
new LinkedBlockingQueue<Runnable>());
|
|
||||||
executor.allowCoreThreadTimeOut(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -627,20 +625,25 @@ public class HBaseFsck {
|
||||||
Collection<HbckInfo> hbckInfos = regionInfoMap.values();
|
Collection<HbckInfo> hbckInfos = regionInfoMap.values();
|
||||||
|
|
||||||
// Parallelized read of .regioninfo files.
|
// Parallelized read of .regioninfo files.
|
||||||
WorkItemHdfsRegionInfo[] hbis = new WorkItemHdfsRegionInfo[hbckInfos.size()];
|
List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
|
||||||
int num = 0;
|
List<Future<Void>> hbiFutures;
|
||||||
|
|
||||||
for (HbckInfo hbi : hbckInfos) {
|
for (HbckInfo hbi : hbckInfos) {
|
||||||
hbis[num] = new WorkItemHdfsRegionInfo(hbi, this, errors);
|
WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
|
||||||
executor.execute(hbis[num]);
|
hbis.add(work);
|
||||||
num++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i=0; i < num; i++) {
|
// Submit and wait for completion
|
||||||
WorkItemHdfsRegionInfo hbi = hbis[i];
|
hbiFutures = executor.invokeAll(hbis);
|
||||||
synchronized(hbi) {
|
|
||||||
while (!hbi.isDone()) {
|
for(int i=0; i<hbiFutures.size(); i++) {
|
||||||
hbi.wait();
|
WorkItemHdfsRegionInfo work = hbis.get(i);
|
||||||
}
|
Future<Void> f = hbiFutures.get(i);
|
||||||
|
try {
|
||||||
|
f.get();
|
||||||
|
} catch(ExecutionException e) {
|
||||||
|
LOG.warn("Failed to read .regioninfo file for region " +
|
||||||
|
work.hbi.getRegionNameAsString(), e.getCause());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1052,22 +1055,22 @@ public class HBaseFsck {
|
||||||
}
|
}
|
||||||
|
|
||||||
// level 1: <HBASE_DIR>/*
|
// level 1: <HBASE_DIR>/*
|
||||||
WorkItemHdfsDir[] dirs = new WorkItemHdfsDir[tableDirs.size()];
|
List<WorkItemHdfsDir> dirs = new ArrayList<WorkItemHdfsDir>(tableDirs.size());
|
||||||
int num = 0;
|
List<Future<Void>> dirsFutures;
|
||||||
|
|
||||||
for (FileStatus tableDir : tableDirs) {
|
for (FileStatus tableDir : tableDirs) {
|
||||||
LOG.debug("Loading region dirs from " +tableDir.getPath());
|
LOG.debug("Loading region dirs from " +tableDir.getPath());
|
||||||
dirs[num] = new WorkItemHdfsDir(this, fs, errors, tableDir);
|
dirs.add(new WorkItemHdfsDir(this, fs, errors, tableDir));
|
||||||
executor.execute(dirs[num]);
|
|
||||||
num++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for all directories to be done
|
// Invoke and wait for Callables to complete
|
||||||
for (int i = 0; i < num; i++) {
|
dirsFutures = executor.invokeAll(dirs);
|
||||||
WorkItemHdfsDir dir = dirs[i];
|
|
||||||
synchronized (dir) {
|
for(Future<Void> f: dirsFutures) {
|
||||||
while (!dir.isDone()) {
|
try {
|
||||||
dir.wait();
|
f.get();
|
||||||
}
|
} catch(ExecutionException e) {
|
||||||
|
LOG.warn("Could not load region dir " , e.getCause());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1137,22 +1140,24 @@ public class HBaseFsck {
|
||||||
void processRegionServers(Collection<ServerName> regionServerList)
|
void processRegionServers(Collection<ServerName> regionServerList)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
|
|
||||||
WorkItemRegion[] work = new WorkItemRegion[regionServerList.size()];
|
List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
|
||||||
int num = 0;
|
List<Future<Void>> workFutures;
|
||||||
|
|
||||||
// loop to contact each region server in parallel
|
// loop to contact each region server in parallel
|
||||||
for (ServerName rsinfo: regionServerList) {
|
for (ServerName rsinfo: regionServerList) {
|
||||||
work[num] = new WorkItemRegion(this, rsinfo, errors, connection);
|
workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
|
||||||
executor.execute(work[num]);
|
|
||||||
num++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for all submitted tasks to be done
|
workFutures = executor.invokeAll(workItems);
|
||||||
for (int i = 0; i < num; i++) {
|
|
||||||
synchronized (work[i]) {
|
for(int i=0; i<workFutures.size(); i++) {
|
||||||
while (!work[i].isDone()) {
|
WorkItemRegion item = workItems.get(i);
|
||||||
work[i].wait();
|
Future<Void> f = workFutures.get(i);
|
||||||
}
|
try {
|
||||||
|
f.get();
|
||||||
|
} catch(ExecutionException e) {
|
||||||
|
LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
|
||||||
|
e.getCause());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2367,10 +2372,11 @@ public class HBaseFsck {
|
||||||
if (metaEntry != null) {
|
if (metaEntry != null) {
|
||||||
return metaEntry.getRegionNameAsString();
|
return metaEntry.getRegionNameAsString();
|
||||||
} else if (hdfsEntry != null) {
|
} else if (hdfsEntry != null) {
|
||||||
return hdfsEntry.hri.getRegionNameAsString();
|
if (hdfsEntry.hri != null) {
|
||||||
} else {
|
return hdfsEntry.hri.getRegionNameAsString();
|
||||||
return null;
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getRegionName() {
|
public byte[] getRegionName() {
|
||||||
|
@ -2619,12 +2625,11 @@ public class HBaseFsck {
|
||||||
/**
|
/**
|
||||||
* Contact a region server and get all information from it
|
* Contact a region server and get all information from it
|
||||||
*/
|
*/
|
||||||
static class WorkItemRegion implements Runnable {
|
static class WorkItemRegion implements Callable<Void> {
|
||||||
private HBaseFsck hbck;
|
private HBaseFsck hbck;
|
||||||
private ServerName rsinfo;
|
private ServerName rsinfo;
|
||||||
private ErrorReporter errors;
|
private ErrorReporter errors;
|
||||||
private HConnection connection;
|
private HConnection connection;
|
||||||
private boolean done;
|
|
||||||
|
|
||||||
WorkItemRegion(HBaseFsck hbck, ServerName info,
|
WorkItemRegion(HBaseFsck hbck, ServerName info,
|
||||||
ErrorReporter errors, HConnection connection) {
|
ErrorReporter errors, HConnection connection) {
|
||||||
|
@ -2632,16 +2637,10 @@ public class HBaseFsck {
|
||||||
this.rsinfo = info;
|
this.rsinfo = info;
|
||||||
this.errors = errors;
|
this.errors = errors;
|
||||||
this.connection = connection;
|
this.connection = connection;
|
||||||
this.done = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// is this task done?
|
|
||||||
synchronized boolean isDone() {
|
|
||||||
return done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void run() {
|
public synchronized Void call() throws IOException {
|
||||||
errors.progress();
|
errors.progress();
|
||||||
try {
|
try {
|
||||||
AdminProtocol server =
|
AdminProtocol server =
|
||||||
|
@ -2672,10 +2671,9 @@ public class HBaseFsck {
|
||||||
} catch (IOException e) { // unable to connect to the region server.
|
} catch (IOException e) { // unable to connect to the region server.
|
||||||
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
|
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
|
||||||
" Unable to fetch region information. " + e);
|
" Unable to fetch region information. " + e);
|
||||||
} finally {
|
throw e;
|
||||||
done = true;
|
|
||||||
notifyAll(); // wakeup anybody waiting for this item to be done
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<HRegionInfo> filterOnlyMetaRegions(List<HRegionInfo> regions) {
|
private List<HRegionInfo> filterOnlyMetaRegions(List<HRegionInfo> regions) {
|
||||||
|
@ -2693,12 +2691,11 @@ public class HBaseFsck {
|
||||||
* Contact hdfs and get all information about specified table directory into
|
* Contact hdfs and get all information about specified table directory into
|
||||||
* regioninfo list.
|
* regioninfo list.
|
||||||
*/
|
*/
|
||||||
static class WorkItemHdfsDir implements Runnable {
|
static class WorkItemHdfsDir implements Callable<Void> {
|
||||||
private HBaseFsck hbck;
|
private HBaseFsck hbck;
|
||||||
private FileStatus tableDir;
|
private FileStatus tableDir;
|
||||||
private ErrorReporter errors;
|
private ErrorReporter errors;
|
||||||
private FileSystem fs;
|
private FileSystem fs;
|
||||||
private boolean done;
|
|
||||||
|
|
||||||
WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
|
WorkItemHdfsDir(HBaseFsck hbck, FileSystem fs, ErrorReporter errors,
|
||||||
FileStatus status) {
|
FileStatus status) {
|
||||||
|
@ -2706,27 +2703,25 @@ public class HBaseFsck {
|
||||||
this.fs = fs;
|
this.fs = fs;
|
||||||
this.tableDir = status;
|
this.tableDir = status;
|
||||||
this.errors = errors;
|
this.errors = errors;
|
||||||
this.done = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized boolean isDone() {
|
|
||||||
return done;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void run() {
|
public synchronized Void call() throws IOException {
|
||||||
try {
|
try {
|
||||||
String tableName = tableDir.getPath().getName();
|
String tableName = tableDir.getPath().getName();
|
||||||
// ignore hidden files
|
// ignore hidden files
|
||||||
if (tableName.startsWith(".") &&
|
if (tableName.startsWith(".") &&
|
||||||
!tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME)))
|
!tableName.equals( Bytes.toString(HConstants.META_TABLE_NAME))) {
|
||||||
return;
|
return null;
|
||||||
|
}
|
||||||
// level 2: <HBASE_DIR>/<table>/*
|
// level 2: <HBASE_DIR>/<table>/*
|
||||||
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
|
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
|
||||||
for (FileStatus regionDir : regionDirs) {
|
for (FileStatus regionDir : regionDirs) {
|
||||||
String encodedName = regionDir.getPath().getName();
|
String encodedName = regionDir.getPath().getName();
|
||||||
// ignore directories that aren't hexadecimal
|
// ignore directories that aren't hexadecimal
|
||||||
if (!encodedName.toLowerCase().matches("[0-9a-f]+")) continue;
|
if (!encodedName.toLowerCase().matches("[0-9a-f]+")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
|
LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
|
||||||
HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
|
HbckInfo hbi = hbck.getOrCreateInfo(encodedName);
|
||||||
|
@ -2763,10 +2758,9 @@ public class HBaseFsck {
|
||||||
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
|
errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
|
||||||
+ tableDir.getPath().getName()
|
+ tableDir.getPath().getName()
|
||||||
+ " Unable to fetch region information. " + e);
|
+ " Unable to fetch region information. " + e);
|
||||||
} finally {
|
throw e;
|
||||||
done = true;
|
|
||||||
notifyAll();
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2774,51 +2768,41 @@ public class HBaseFsck {
|
||||||
* Contact hdfs and get all information about specified table directory into
|
* Contact hdfs and get all information about specified table directory into
|
||||||
* regioninfo list.
|
* regioninfo list.
|
||||||
*/
|
*/
|
||||||
static class WorkItemHdfsRegionInfo implements Runnable {
|
static class WorkItemHdfsRegionInfo implements Callable<Void> {
|
||||||
private HbckInfo hbi;
|
private HbckInfo hbi;
|
||||||
private HBaseFsck hbck;
|
private HBaseFsck hbck;
|
||||||
private ErrorReporter errors;
|
private ErrorReporter errors;
|
||||||
private boolean done;
|
|
||||||
|
|
||||||
WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
|
WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
|
||||||
this.hbi = hbi;
|
this.hbi = hbi;
|
||||||
this.hbck = hbck;
|
this.hbck = hbck;
|
||||||
this.errors = errors;
|
this.errors = errors;
|
||||||
this.done = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
synchronized boolean isDone() {
|
|
||||||
return done;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void run() {
|
public synchronized Void call() throws IOException {
|
||||||
try {
|
// only load entries that haven't been loaded yet.
|
||||||
// only load entries that haven't been loaded yet.
|
if (hbi.getHdfsHRI() == null) {
|
||||||
if (hbi.getHdfsHRI() == null) {
|
try {
|
||||||
|
hbck.loadHdfsRegioninfo(hbi);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
|
||||||
|
+ Bytes.toString(hbi.getTableName()) + " in hdfs dir "
|
||||||
|
+ hbi.getHdfsRegionDir()
|
||||||
|
+ "! It may be an invalid format or version file. Treating as "
|
||||||
|
+ "an orphaned regiondir.";
|
||||||
|
errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
|
||||||
try {
|
try {
|
||||||
hbck.loadHdfsRegioninfo(hbi);
|
hbck.debugLsr(hbi.getHdfsRegionDir());
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe2) {
|
||||||
String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
|
LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
|
||||||
+ Bytes.toString(hbi.getTableName()) + " in hdfs dir "
|
throw ioe2;
|
||||||
+ hbi.getHdfsRegionDir()
|
|
||||||
+ "! It may be an invalid format or version file. Treating as "
|
|
||||||
+ "an orphaned regiondir.";
|
|
||||||
errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
|
|
||||||
try {
|
|
||||||
hbck.debugLsr(hbi.getHdfsRegionDir());
|
|
||||||
} catch (IOException ioe2) {
|
|
||||||
LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
|
|
||||||
return; // TODO convert this in to a future
|
|
||||||
}
|
|
||||||
hbck.orphanHdfsDirs.add(hbi);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
hbck.orphanHdfsDirs.add(hbi);
|
||||||
|
throw ioe;
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
done = true;
|
|
||||||
notifyAll();
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue