HBASE-12223 MultiTableInputFormatBase.getSplits is too slow (Yuanbo Peng)
This commit is contained in:
parent
83e4bfaf73
commit
15cf0a6e7b
|
@ -46,6 +46,9 @@ import org.apache.hadoop.mapreduce.JobContext;
|
|||
import org.apache.hadoop.mapreduce.RecordReader;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
* A base for {@link MultiTableInputFormat}s. Receives a list of
|
||||
* {@link Scan} instances that define the input tables and
|
||||
|
@ -129,67 +132,82 @@ public abstract class MultiTableInputFormatBase extends
|
|||
if (scans.isEmpty()) {
|
||||
throw new IOException("No scans were provided.");
|
||||
}
|
||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||
|
||||
Map<TableName, List<Scan>> tableMaps = new HashMap<TableName, List<Scan>>();
|
||||
for (Scan scan : scans) {
|
||||
byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
|
||||
if (tableNameBytes == null)
|
||||
throw new IOException("A scan object did not have a table name");
|
||||
|
||||
TableName tableName = TableName.valueOf(tableNameBytes);
|
||||
|
||||
List<Scan> scanList = tableMaps.get(tableName);
|
||||
if (scanList == null) {
|
||||
scanList = new ArrayList<Scan>();
|
||||
tableMaps.put(tableName, scanList);
|
||||
}
|
||||
scanList.add(scan);
|
||||
}
|
||||
|
||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||
Iterator iter = tableMaps.entrySet().iterator();
|
||||
while (iter.hasNext()) {
|
||||
Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
|
||||
TableName tableName = entry.getKey();
|
||||
List<Scan> scanList = entry.getValue();
|
||||
Table table = null;
|
||||
RegionLocator regionLocator = null;
|
||||
Connection conn = null;
|
||||
try {
|
||||
|
||||
try{
|
||||
conn = ConnectionFactory.createConnection(context.getConfiguration());
|
||||
table = conn.getTable(tableName);
|
||||
regionLocator = conn.getRegionLocator(tableName);
|
||||
regionLocator = (RegionLocator) table;
|
||||
Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
|
||||
if (keys == null || keys.getFirst() == null ||
|
||||
keys.getFirst().length == 0) {
|
||||
throw new IOException("Expecting at least one region for table : "
|
||||
+ tableName.getNameAsString());
|
||||
}
|
||||
int count = 0;
|
||||
|
||||
byte[] startRow = scan.getStartRow();
|
||||
byte[] stopRow = scan.getStopRow();
|
||||
|
||||
RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
|
||||
regionLocator, conn.getAdmin());
|
||||
|
||||
for (int i = 0; i < keys.getFirst().length; i++) {
|
||||
if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
|
||||
continue;
|
||||
regionLocator, conn.getAdmin());
|
||||
Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
|
||||
for (Scan scan : scanList) {
|
||||
if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
|
||||
throw new IOException("Expecting at least one region for table : "
|
||||
+ tableName.getNameAsString());
|
||||
}
|
||||
HRegionLocation hregionLocation = regionLocator.getRegionLocation(
|
||||
keys.getFirst()[i], false);
|
||||
String regionHostname = hregionLocation.getHostname();
|
||||
HRegionInfo regionInfo = hregionLocation.getRegionInfo();
|
||||
int count = 0;
|
||||
|
||||
// determine if the given start and stop keys fall into the range
|
||||
if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
|
||||
Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
|
||||
(stopRow.length == 0 ||
|
||||
Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
|
||||
byte[] splitStart =
|
||||
startRow.length == 0 ||
|
||||
Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
|
||||
.getFirst()[i] : startRow;
|
||||
byte[] splitStop =
|
||||
(stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
|
||||
stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
|
||||
.getSecond()[i] : stopRow;
|
||||
byte[] startRow = scan.getStartRow();
|
||||
byte[] stopRow = scan.getStopRow();
|
||||
|
||||
long regionSize = sizeCalculator.getRegionSize(regionInfo.getRegionName());
|
||||
TableSplit split =
|
||||
new TableSplit(regionLocator.getName(),
|
||||
scan, splitStart, splitStop, regionHostname, regionSize);
|
||||
for (int i = 0; i < keys.getFirst().length; i++) {
|
||||
if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
splits.add(split);
|
||||
if (LOG.isDebugEnabled())
|
||||
LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
|
||||
if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
|
||||
Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
|
||||
(stopRow.length == 0 || Bytes.compareTo(stopRow,
|
||||
keys.getFirst()[i]) > 0)) {
|
||||
byte[] splitStart = startRow.length == 0 ||
|
||||
Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
|
||||
keys.getFirst()[i] : startRow;
|
||||
byte[] splitStop = (stopRow.length == 0 ||
|
||||
Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
|
||||
keys.getSecond()[i].length > 0 ?
|
||||
keys.getSecond()[i] : stopRow;
|
||||
|
||||
HRegionLocation hregionLocation = regionLocator.getRegionLocation(
|
||||
keys.getFirst()[i], false);
|
||||
String regionHostname = hregionLocation.getHostname();
|
||||
HRegionInfo regionInfo = hregionLocation.getRegionInfo();
|
||||
long regionSize = sizeCalculator.getRegionSize(
|
||||
regionInfo.getRegionName());
|
||||
|
||||
TableSplit split = new TableSplit(table.getName(),
|
||||
scan, splitStart, splitStop, regionHostname, regionSize);
|
||||
|
||||
splits.add(split);
|
||||
|
||||
if (LOG.isDebugEnabled())
|
||||
LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
@ -198,6 +216,7 @@ public abstract class MultiTableInputFormatBase extends
|
|||
if (null != conn) conn.close();
|
||||
}
|
||||
}
|
||||
|
||||
return splits;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue