Revert "HBASE-18814 Improve TableSnapshotInputFormat to allow more multiple mappers per region" due to wrong jira id.
This reverts commit 55987efdcf
.
This commit is contained in:
parent
4579bba486
commit
c256532ce3
|
@ -151,7 +151,7 @@ public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase
|
||||||
int expectedNumSplits = numRegions > 2 ? numRegions - 2 : numRegions;
|
int expectedNumSplits = numRegions > 2 ? numRegions - 2 : numRegions;
|
||||||
|
|
||||||
org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat.doTestWithMapReduce(util,
|
org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat.doTestWithMapReduce(util,
|
||||||
tableName, snapshotName, START_ROW, END_ROW, tableDir, numRegions, 1,
|
tableName, snapshotName, START_ROW, END_ROW, tableDir, numRegions,
|
||||||
expectedNumSplits, false);
|
expectedNumSplits, false);
|
||||||
} else if (mr.equalsIgnoreCase(MAPRED_IMPLEMENTATION)) {
|
} else if (mr.equalsIgnoreCase(MAPRED_IMPLEMENTATION)) {
|
||||||
/*
|
/*
|
||||||
|
@ -165,7 +165,7 @@ public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase
|
||||||
int expectedNumSplits = numRegions;
|
int expectedNumSplits = numRegions;
|
||||||
|
|
||||||
org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat.doTestWithMapReduce(util,
|
org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat.doTestWithMapReduce(util,
|
||||||
tableName, snapshotName, MAPRED_START_ROW, MAPRED_END_ROW, tableDir, numRegions, 1,
|
tableName, snapshotName, MAPRED_START_ROW, MAPRED_END_ROW, tableDir, numRegions,
|
||||||
expectedNumSplits, false);
|
expectedNumSplits, false);
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Unrecognized mapreduce implementation: " + mr +".");
|
throw new IllegalArgumentException("Unrecognized mapreduce implementation: " + mr +".");
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.mapreduce.ResultSerialization;
|
||||||
import org.apache.hadoop.hbase.security.User;
|
import org.apache.hadoop.hbase.security.User;
|
||||||
import org.apache.hadoop.hbase.security.UserProvider;
|
import org.apache.hadoop.hbase.security.UserProvider;
|
||||||
import org.apache.hadoop.hbase.security.token.TokenUtil;
|
import org.apache.hadoop.hbase.security.token.TokenUtil;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.mapred.FileInputFormat;
|
import org.apache.hadoop.mapred.FileInputFormat;
|
||||||
import org.apache.hadoop.mapred.InputFormat;
|
import org.apache.hadoop.mapred.InputFormat;
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
import org.apache.hadoop.mapred.JobConf;
|
||||||
|
@ -186,43 +185,6 @@ public class TableMapReduceUtil {
|
||||||
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
|
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets up the job for reading from a table snapshot. It bypasses hbase servers
|
|
||||||
* and read directly from snapshot files.
|
|
||||||
*
|
|
||||||
* @param snapshotName The name of the snapshot (of a table) to read from.
|
|
||||||
* @param columns The columns to scan.
|
|
||||||
* @param mapper The mapper class to use.
|
|
||||||
* @param outputKeyClass The class of the output key.
|
|
||||||
* @param outputValueClass The class of the output value.
|
|
||||||
* @param jobConf The current job to adjust. Make sure the passed job is
|
|
||||||
* carrying all necessary HBase configuration.
|
|
||||||
* @param addDependencyJars upload HBase jars and jars for any of the configured
|
|
||||||
* job classes via the distributed cache (tmpjars).
|
|
||||||
* @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
|
|
||||||
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
|
|
||||||
* After the job is finished, restore directory can be deleted.
|
|
||||||
* @param splitAlgo algorithm to split
|
|
||||||
* @param numSplitsPerRegion how many input splits to generate per one region
|
|
||||||
* @throws IOException When setting up the details fails.
|
|
||||||
* @see TableSnapshotInputFormat
|
|
||||||
*/
|
|
||||||
public static void initTableSnapshotMapJob(String snapshotName, String columns,
|
|
||||||
Class<? extends TableMap> mapper,
|
|
||||||
Class<?> outputKeyClass,
|
|
||||||
Class<?> outputValueClass, JobConf jobConf,
|
|
||||||
boolean addDependencyJars, Path tmpRestoreDir,
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo,
|
|
||||||
int numSplitsPerRegion)
|
|
||||||
throws IOException {
|
|
||||||
TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo,
|
|
||||||
numSplitsPerRegion);
|
|
||||||
initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf,
|
|
||||||
addDependencyJars, TableSnapshotInputFormat.class);
|
|
||||||
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(jobConf);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use this before submitting a TableReduce job. It will
|
* Use this before submitting a TableReduce job. It will
|
||||||
* appropriately set up the JobConf.
|
* appropriately set up the JobConf.
|
||||||
|
|
|
@ -26,13 +26,11 @@ import org.apache.hadoop.hbase.client.Result;
|
||||||
import org.apache.hadoop.hbase.client.Scan;
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||||
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
|
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.mapred.InputFormat;
|
import org.apache.hadoop.mapred.InputFormat;
|
||||||
import org.apache.hadoop.mapred.InputSplit;
|
import org.apache.hadoop.mapred.InputSplit;
|
||||||
import org.apache.hadoop.mapred.JobConf;
|
import org.apache.hadoop.mapred.JobConf;
|
||||||
import org.apache.hadoop.mapred.RecordReader;
|
import org.apache.hadoop.mapred.RecordReader;
|
||||||
import org.apache.hadoop.mapred.Reporter;
|
import org.apache.hadoop.mapred.Reporter;
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
|
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
|
@ -165,20 +163,4 @@ public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWrita
|
||||||
throws IOException {
|
throws IOException {
|
||||||
TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
|
TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
|
|
||||||
* @param job the job to configure
|
|
||||||
* @param snapshotName the name of the snapshot to read from
|
|
||||||
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
|
|
||||||
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
|
|
||||||
* After the job is finished, restoreDir can be deleted.
|
|
||||||
* @param splitAlgo split algorithm to generate splits from region
|
|
||||||
* @param numSplitsPerRegion how many input splits to generate per one region
|
|
||||||
* @throws IOException if an error occurs
|
|
||||||
*/
|
|
||||||
public static void setInput(JobConf job, String snapshotName, Path restoreDir,
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
|
|
||||||
TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,6 @@ import org.apache.hadoop.hbase.security.UserProvider;
|
||||||
import org.apache.hadoop.hbase.security.token.TokenUtil;
|
import org.apache.hadoop.hbase.security.token.TokenUtil;
|
||||||
import org.apache.hadoop.hbase.util.Base64;
|
import org.apache.hadoop.hbase.util.Base64;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
|
import org.apache.hadoop.hbase.zookeeper.ZKConfig;
|
||||||
import org.apache.hadoop.io.Writable;
|
import org.apache.hadoop.io.Writable;
|
||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
import org.apache.hadoop.mapreduce.InputFormat;
|
||||||
|
@ -376,43 +375,6 @@ public class TableMapReduceUtil {
|
||||||
resetCacheConfig(job.getConfiguration());
|
resetCacheConfig(job.getConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets up the job for reading from a table snapshot. It bypasses hbase servers
|
|
||||||
* and read directly from snapshot files.
|
|
||||||
*
|
|
||||||
* @param snapshotName The name of the snapshot (of a table) to read from.
|
|
||||||
* @param scan The scan instance with the columns, time range etc.
|
|
||||||
* @param mapper The mapper class to use.
|
|
||||||
* @param outputKeyClass The class of the output key.
|
|
||||||
* @param outputValueClass The class of the output value.
|
|
||||||
* @param job The current job to adjust. Make sure the passed job is
|
|
||||||
* carrying all necessary HBase configuration.
|
|
||||||
* @param addDependencyJars upload HBase jars and jars for any of the configured
|
|
||||||
* job classes via the distributed cache (tmpjars).
|
|
||||||
*
|
|
||||||
* @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should
|
|
||||||
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
|
|
||||||
* After the job is finished, restore directory can be deleted.
|
|
||||||
* @param splitAlgo algorithm to split
|
|
||||||
* @param numSplitsPerRegion how many input splits to generate per one region
|
|
||||||
* @throws IOException When setting up the details fails.
|
|
||||||
* @see TableSnapshotInputFormat
|
|
||||||
*/
|
|
||||||
public static void initTableSnapshotMapperJob(String snapshotName, Scan scan,
|
|
||||||
Class<? extends TableMapper> mapper,
|
|
||||||
Class<?> outputKeyClass,
|
|
||||||
Class<?> outputValueClass, Job job,
|
|
||||||
boolean addDependencyJars, Path tmpRestoreDir,
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo,
|
|
||||||
int numSplitsPerRegion)
|
|
||||||
throws IOException {
|
|
||||||
TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo,
|
|
||||||
numSplitsPerRegion);
|
|
||||||
initTableMapperJob(snapshotName, scan, mapper, outputKeyClass,
|
|
||||||
outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class);
|
|
||||||
resetCacheConfig(job.getConfiguration());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use this before submitting a Multi TableMap job. It will appropriately set
|
* Use this before submitting a Multi TableMap job. It will appropriately set
|
||||||
* up the job.
|
* up the job.
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.client.Result;
|
||||||
import org.apache.hadoop.hbase.client.Scan;
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
|
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
import org.apache.hadoop.io.Writable;
|
||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
import org.apache.hadoop.mapreduce.InputFormat;
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
@ -67,10 +66,8 @@ import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTe
|
||||||
* }
|
* }
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p>
|
* <p>
|
||||||
* Internally, this input format restores the snapshot into the given tmp directory. By default,
|
* Internally, this input format restores the snapshot into the given tmp directory. Similar to
|
||||||
* and similar to {@link TableInputFormat} an InputSplit is created per region, but optionally you
|
* {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading
|
||||||
* can run N mapper tasks per every region, in which case the region key range will be split to
|
|
||||||
* N sub-ranges and an InputSplit will be created per sub-range. The region is opened for reading
|
|
||||||
* from each RecordReader. An internal RegionScanner is used to execute the
|
* from each RecordReader. An internal RegionScanner is used to execute the
|
||||||
* {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
|
* {@link org.apache.hadoop.hbase.CellScanner} obtained from the user.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -219,21 +216,4 @@ public class TableSnapshotInputFormat extends InputFormat<ImmutableBytesWritable
|
||||||
throws IOException {
|
throws IOException {
|
||||||
TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
|
TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
|
|
||||||
* @param job the job to configure
|
|
||||||
* @param snapshotName the name of the snapshot to read from
|
|
||||||
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
|
|
||||||
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
|
|
||||||
* After the job is finished, restoreDir can be deleted.
|
|
||||||
* @param splitAlgo split algorithm to generate splits from region
|
|
||||||
* @param numSplitsPerRegion how many input splits to generate per one region
|
|
||||||
* @throws IOException if an error occurs
|
|
||||||
*/
|
|
||||||
public static void setInput(Job job, String snapshotName, Path restoreDir,
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
|
|
||||||
TableSnapshotInputFormatImpl.setInput(job.getConfiguration(), snapshotName, restoreDir,
|
|
||||||
splitAlgo, numSplitsPerRegion);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
|
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
import org.apache.hadoop.io.Writable;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
@ -75,17 +74,6 @@ public class TableSnapshotInputFormatImpl {
|
||||||
"hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
|
"hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
|
||||||
private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
|
private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
|
||||||
|
|
||||||
/**
|
|
||||||
* For MapReduce jobs running multiple mappers per region, determines
|
|
||||||
* what split algorithm we should be using to find split points for scanners.
|
|
||||||
*/
|
|
||||||
public static final String SPLIT_ALGO = "hbase.mapreduce.split.algorithm";
|
|
||||||
/**
|
|
||||||
* For MapReduce jobs running multiple mappers per region, determines
|
|
||||||
* number of splits to generate per region.
|
|
||||||
*/
|
|
||||||
public static final String NUM_SPLITS_PER_REGION = "hbase.mapreduce.splits.per.region";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implementation class for InputSplit logic common between mapred and mapreduce.
|
* Implementation class for InputSplit logic common between mapred and mapreduce.
|
||||||
*/
|
*/
|
||||||
|
@ -273,33 +261,9 @@ public class TableSnapshotInputFormatImpl {
|
||||||
// the temp dir where the snapshot is restored
|
// the temp dir where the snapshot is restored
|
||||||
Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
|
Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
|
||||||
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf);
|
return getSplits(scan, manifest, regionInfos, restoreDir, conf);
|
||||||
|
|
||||||
int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1);
|
|
||||||
|
|
||||||
return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException{
|
|
||||||
String splitAlgoClassName = conf.get(SPLIT_ALGO);
|
|
||||||
if (splitAlgoClassName == null)
|
|
||||||
return null;
|
|
||||||
try {
|
|
||||||
return ((Class<? extends RegionSplitter.SplitAlgorithm>)
|
|
||||||
Class.forName(splitAlgoClassName)).newInstance();
|
|
||||||
} catch (ClassNotFoundException e) {
|
|
||||||
throw new IOException("SplitAlgo class " + splitAlgoClassName +
|
|
||||||
" is not found", e);
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
throw new IOException("SplitAlgo class " + splitAlgoClassName +
|
|
||||||
" is not instantiable", e);
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
throw new IOException("SplitAlgo class " + splitAlgoClassName +
|
|
||||||
" is not instantiable", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
|
public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
|
||||||
List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
|
List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
|
||||||
if (regionManifests == null) {
|
if (regionManifests == null) {
|
||||||
|
@ -344,12 +308,6 @@ public class TableSnapshotInputFormatImpl {
|
||||||
|
|
||||||
public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
|
public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
|
||||||
List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
|
List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
|
||||||
return getSplits(scan, manifest, regionManifests, restoreDir, conf, null, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
|
|
||||||
List<HRegionInfo> regionManifests, Path restoreDir,
|
|
||||||
Configuration conf, RegionSplitter.SplitAlgorithm sa, int numSplits) throws IOException {
|
|
||||||
// load table descriptor
|
// load table descriptor
|
||||||
TableDescriptor htd = manifest.getTableDescriptor();
|
TableDescriptor htd = manifest.getTableDescriptor();
|
||||||
|
|
||||||
|
@ -359,36 +317,16 @@ public class TableSnapshotInputFormatImpl {
|
||||||
for (HRegionInfo hri : regionManifests) {
|
for (HRegionInfo hri : regionManifests) {
|
||||||
// load region descriptor
|
// load region descriptor
|
||||||
|
|
||||||
if (numSplits > 1) {
|
if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
|
||||||
byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true);
|
hri.getEndKey())) {
|
||||||
for (int i = 0; i < sp.length - 1; i++) {
|
// compute HDFS locations from snapshot files (which will get the locations for
|
||||||
if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), sp[i],
|
// referred hfiles)
|
||||||
sp[i + 1])) {
|
List<String> hosts = getBestLocations(conf,
|
||||||
// compute HDFS locations from snapshot files (which will get the locations for
|
HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
|
||||||
// referred hfiles)
|
|
||||||
List<String> hosts = getBestLocations(conf,
|
|
||||||
HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
|
|
||||||
|
|
||||||
int len = Math.min(3, hosts.size());
|
int len = Math.min(3, hosts.size());
|
||||||
hosts = hosts.subList(0, len);
|
hosts = hosts.subList(0, len);
|
||||||
Scan boundedScan = new Scan(scan);
|
splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
|
||||||
boundedScan.setStartRow(sp[i]);
|
|
||||||
boundedScan.setStopRow(sp[i + 1]);
|
|
||||||
splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
|
|
||||||
hri.getEndKey())) {
|
|
||||||
// compute HDFS locations from snapshot files (which will get the locations for
|
|
||||||
// referred hfiles)
|
|
||||||
List<String> hosts = getBestLocations(conf,
|
|
||||||
HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
|
|
||||||
|
|
||||||
int len = Math.min(3, hosts.size());
|
|
||||||
hosts = hosts.subList(0, len);
|
|
||||||
splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -423,7 +361,7 @@ public class TableSnapshotInputFormatImpl {
|
||||||
|
|
||||||
// Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
|
// Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
|
||||||
double cutoffMultiplier
|
double cutoffMultiplier
|
||||||
= conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
|
= conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
|
||||||
|
|
||||||
double filterWeight = topHost.getWeight() * cutoffMultiplier;
|
double filterWeight = topHost.getWeight() * cutoffMultiplier;
|
||||||
|
|
||||||
|
@ -457,35 +395,8 @@ public class TableSnapshotInputFormatImpl {
|
||||||
*/
|
*/
|
||||||
public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
|
public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
setInput(conf, snapshotName, restoreDir, null, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Configures the job to use TableSnapshotInputFormat to read from a snapshot.
|
|
||||||
* @param conf the job to configure
|
|
||||||
* @param snapshotName the name of the snapshot to read from
|
|
||||||
* @param restoreDir a temporary directory to restore the snapshot into. Current user should
|
|
||||||
* have write permissions to this directory, and this should not be a subdirectory of rootdir.
|
|
||||||
* After the job is finished, restoreDir can be deleted.
|
|
||||||
* @param numSplitsPerRegion how many input splits to generate per one region
|
|
||||||
* @param splitAlgo SplitAlgorithm to be used when generating InputSplits
|
|
||||||
* @throws IOException if an error occurs
|
|
||||||
*/
|
|
||||||
public static void setInput(Configuration conf, String snapshotName, Path restoreDir,
|
|
||||||
RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion)
|
|
||||||
throws IOException {
|
|
||||||
conf.set(SNAPSHOT_NAME_KEY, snapshotName);
|
conf.set(SNAPSHOT_NAME_KEY, snapshotName);
|
||||||
if (numSplitsPerRegion < 1) {
|
|
||||||
throw new IllegalArgumentException("numSplits must be >= 1, " +
|
|
||||||
"illegal numSplits : " + numSplitsPerRegion);
|
|
||||||
}
|
|
||||||
if (splitAlgo == null && numSplitsPerRegion > 1) {
|
|
||||||
throw new IllegalArgumentException("Split algo can't be null when numSplits > 1");
|
|
||||||
}
|
|
||||||
if (splitAlgo != null) {
|
|
||||||
conf.set(SPLIT_ALGO, splitAlgo.getClass().getName());
|
|
||||||
}
|
|
||||||
conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion);
|
|
||||||
Path rootDir = FSUtils.getRootDir(conf);
|
Path rootDir = FSUtils.getRootDir(conf);
|
||||||
FileSystem fs = rootDir.getFileSystem(conf);
|
FileSystem fs = rootDir.getFileSystem(conf);
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||||
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
|
import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
|
||||||
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
|
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
import org.apache.hadoop.io.NullWritable;
|
import org.apache.hadoop.io.NullWritable;
|
||||||
import org.apache.hadoop.mapred.InputSplit;
|
import org.apache.hadoop.mapred.InputSplit;
|
||||||
import org.apache.hadoop.mapred.JobClient;
|
import org.apache.hadoop.mapred.JobClient;
|
||||||
|
@ -138,20 +137,20 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
@Test
|
@Test
|
||||||
@Override
|
@Override
|
||||||
public void testWithMockedMapReduceMultiRegion() throws Exception {
|
public void testWithMockedMapReduceMultiRegion() throws Exception {
|
||||||
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 1, 10);
|
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Override
|
@Override
|
||||||
public void testWithMapReduceMultiRegion() throws Exception {
|
public void testWithMapReduceMultiRegion() throws Exception {
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 1, 10, false);
|
testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Override
|
@Override
|
||||||
// run the MR job while HBase is offline
|
// run the MR job while HBase is offline
|
||||||
public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
|
public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 1, 10, true);
|
testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -165,7 +164,7 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
||||||
int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception {
|
int numRegions, int expectedNumSplits) throws Exception {
|
||||||
setupCluster();
|
setupCluster();
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
try {
|
try {
|
||||||
|
@ -175,16 +174,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
JobConf job = new JobConf(util.getConfiguration());
|
JobConf job = new JobConf(util.getConfiguration());
|
||||||
Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
|
Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
|
||||||
|
|
||||||
if (numSplitsPerRegion > 1) {
|
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
|
||||||
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
|
COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
||||||
COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
NullWritable.class, job, false, tmpTableDir);
|
||||||
NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(),
|
|
||||||
numSplitsPerRegion);
|
|
||||||
} else {
|
|
||||||
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
|
|
||||||
COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
|
||||||
NullWritable.class, job, false, tmpTableDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
// mapred doesn't support start and end keys? o.O
|
// mapred doesn't support start and end keys? o.O
|
||||||
verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
|
verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
|
||||||
|
@ -233,16 +225,16 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
||||||
String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits,
|
String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
|
||||||
boolean shutdownCluster) throws Exception {
|
boolean shutdownCluster) throws Exception {
|
||||||
doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
|
doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
|
||||||
numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster);
|
numRegions, expectedNumSplits, shutdownCluster);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is also called by the IntegrationTestTableSnapshotInputFormat
|
// this is also called by the IntegrationTestTableSnapshotInputFormat
|
||||||
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
|
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
|
||||||
String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
|
String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
|
||||||
int numSplitsPerRegion,int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
||||||
|
|
||||||
//create the table and snapshot
|
//create the table and snapshot
|
||||||
createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
|
createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
|
||||||
|
@ -259,16 +251,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
|
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
|
||||||
TestTableSnapshotInputFormat.class);
|
TestTableSnapshotInputFormat.class);
|
||||||
|
|
||||||
if(numSplitsPerRegion > 1) {
|
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
|
||||||
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
|
TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
||||||
TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
NullWritable.class, jobConf, true, tableDir);
|
||||||
NullWritable.class, jobConf, true, tableDir, new RegionSplitter.UniformSplit(),
|
|
||||||
numSplitsPerRegion);
|
|
||||||
} else {
|
|
||||||
TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
|
|
||||||
TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
|
||||||
NullWritable.class, jobConf, true, tableDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
|
jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
|
||||||
jobConf.setNumReduceTasks(1);
|
jobConf.setNumReduceTasks(1);
|
||||||
|
|
|
@ -78,10 +78,10 @@ public abstract class TableSnapshotInputFormatTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
||||||
int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception;
|
int numRegions, int expectedNumSplits) throws Exception;
|
||||||
|
|
||||||
protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
||||||
String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits,
|
String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
|
||||||
boolean shutdownCluster) throws Exception;
|
boolean shutdownCluster) throws Exception;
|
||||||
|
|
||||||
protected abstract byte[] getStartRow();
|
protected abstract byte[] getStartRow();
|
||||||
|
@ -90,33 +90,28 @@ public abstract class TableSnapshotInputFormatTestBase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithMockedMapReduceSingleRegion() throws Exception {
|
public void testWithMockedMapReduceSingleRegion() throws Exception {
|
||||||
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1, 1);
|
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithMockedMapReduceMultiRegion() throws Exception {
|
public void testWithMockedMapReduceMultiRegion() throws Exception {
|
||||||
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 1, 8);
|
testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithMapReduceSingleRegion() throws Exception {
|
public void testWithMapReduceSingleRegion() throws Exception {
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, 1, false);
|
testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithMapReduceMultiRegion() throws Exception {
|
public void testWithMapReduceMultiRegion() throws Exception {
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 1, 8, false);
|
testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false);
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testWithMapReduceMultipleMappersPerRegion() throws Exception {
|
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 5, 50, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
// run the MR job while HBase is offline
|
// run the MR job while HBase is offline
|
||||||
public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
|
public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
|
||||||
testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 1, 8, true);
|
testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that snapshot restore does not create back references in the HBase root dir.
|
// Test that snapshot restore does not create back references in the HBase root dir.
|
||||||
|
@ -164,13 +159,13 @@ public abstract class TableSnapshotInputFormatTestBase {
|
||||||
String snapshotName, Path tmpTableDir) throws Exception;
|
String snapshotName, Path tmpTableDir) throws Exception;
|
||||||
|
|
||||||
protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
|
protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName,
|
||||||
int numRegions, int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
||||||
setupCluster();
|
setupCluster();
|
||||||
try {
|
try {
|
||||||
Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
|
Path tableDir = util.getDataTestDirOnTestFS(snapshotName);
|
||||||
TableName tableName = TableName.valueOf("testWithMapReduce");
|
TableName tableName = TableName.valueOf("testWithMapReduce");
|
||||||
testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
|
testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions,
|
||||||
numSplitsPerRegion, expectedNumSplits, shutdownCluster);
|
expectedNumSplits, shutdownCluster);
|
||||||
} finally {
|
} finally {
|
||||||
tearDownCluster();
|
tearDownCluster();
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,6 @@ import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.Table;
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
|
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter;
|
|
||||||
|
|
||||||
@Category({VerySlowMapReduceTests.class, LargeTests.class})
|
@Category({VerySlowMapReduceTests.class, LargeTests.class})
|
||||||
public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
|
public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
|
||||||
|
@ -210,7 +209,7 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
|
||||||
int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception {
|
int numRegions, int expectedNumSplits) throws Exception {
|
||||||
setupCluster();
|
setupCluster();
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
try {
|
try {
|
||||||
|
@ -221,16 +220,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
|
Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName);
|
||||||
Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
|
Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
|
||||||
|
|
||||||
if (numSplitsPerRegion > 1) {
|
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
||||||
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
||||||
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
NullWritable.class, job, false, tmpTableDir);
|
||||||
NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(),
|
|
||||||
numSplitsPerRegion);
|
|
||||||
} else {
|
|
||||||
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
|
||||||
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
|
||||||
NullWritable.class, job, false, tmpTableDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
|
verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
|
||||||
|
|
||||||
|
@ -332,16 +324,16 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
|
||||||
String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion,
|
String snapshotName, Path tableDir, int numRegions, int expectedNumSplits,
|
||||||
int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
boolean shutdownCluster) throws Exception {
|
||||||
doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
|
doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
|
||||||
numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster);
|
numRegions, expectedNumSplits, shutdownCluster);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is also called by the IntegrationTestTableSnapshotInputFormat
|
// this is also called by the IntegrationTestTableSnapshotInputFormat
|
||||||
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
|
public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
|
||||||
String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
|
String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
|
||||||
int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
int expectedNumSplits, boolean shutdownCluster) throws Exception {
|
||||||
|
|
||||||
LOG.info("testing with MapReduce");
|
LOG.info("testing with MapReduce");
|
||||||
|
|
||||||
|
@ -360,18 +352,11 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
|
||||||
|
|
||||||
job.setJarByClass(util.getClass());
|
job.setJarByClass(util.getClass());
|
||||||
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
|
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
|
||||||
TestTableSnapshotInputFormat.class);
|
TestTableSnapshotInputFormat.class);
|
||||||
|
|
||||||
if (numSplitsPerRegion > 1) {
|
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
||||||
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
||||||
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
NullWritable.class, job, true, tableDir);
|
||||||
NullWritable.class, job, true, tableDir, new RegionSplitter.UniformSplit(),
|
|
||||||
numSplitsPerRegion);
|
|
||||||
} else {
|
|
||||||
TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
|
|
||||||
scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
|
|
||||||
NullWritable.class, job, true, tableDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
|
job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
|
||||||
job.setNumReduceTasks(1);
|
job.setNumReduceTasks(1);
|
||||||
|
|
|
@ -53,8 +53,6 @@ public class ClientSideRegionScanner extends AbstractClientScanner {
|
||||||
// region is immutable, set isolation level
|
// region is immutable, set isolation level
|
||||||
scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
|
scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
|
||||||
|
|
||||||
htd = TableDescriptorBuilder.newBuilder(htd).setReadOnly(true).build();
|
|
||||||
|
|
||||||
// open region from the snapshot directory
|
// open region from the snapshot directory
|
||||||
this.region = HRegion.openHRegion(conf, fs, rootDir, hri, htd, null, null, null);
|
this.region = HRegion.openHRegion(conf, fs, rootDir, hri, htd, null, null, null);
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
import org.apache.hadoop.hbase.ClusterStatus;
|
import org.apache.hadoop.hbase.ClusterStatus;
|
||||||
import org.apache.hadoop.hbase.ClusterStatus.Option;
|
import org.apache.hadoop.hbase.ClusterStatus.Option;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HRegionLocation;
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.MetaTableAccessor;
|
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||||
|
@ -182,17 +181,6 @@ public class RegionSplitter {
|
||||||
*/
|
*/
|
||||||
byte[][] split(int numRegions);
|
byte[][] split(int numRegions);
|
||||||
|
|
||||||
/**
|
|
||||||
* Some MapReduce jobs may want to run multiple mappers per region,
|
|
||||||
* this is intended for such usecase.
|
|
||||||
*
|
|
||||||
* @param start first row (inclusive)
|
|
||||||
* @param end last row (exclusive)
|
|
||||||
* @param numSplits number of splits to generate
|
|
||||||
* @param inclusive whether start and end are returned as split points
|
|
||||||
*/
|
|
||||||
byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* In HBase, the first row is represented by an empty byte array. This might
|
* In HBase, the first row is represented by an empty byte array. This might
|
||||||
* cause problems with your split algorithm or row printing. All your APIs
|
* cause problems with your split algorithm or row printing. All your APIs
|
||||||
|
@ -930,39 +918,6 @@ public class RegionSplitter {
|
||||||
return convertToBytes(splits);
|
return convertToBytes(splits);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
|
|
||||||
BigInteger s = convertToBigInteger(start);
|
|
||||||
BigInteger e = convertToBigInteger(end);
|
|
||||||
|
|
||||||
Preconditions.checkArgument(e.compareTo(s) > 0,
|
|
||||||
"last row (%s) is configured less than first row (%s)", rowToStr(end),
|
|
||||||
end);
|
|
||||||
// +1 to range because the last row is inclusive
|
|
||||||
BigInteger range = e.subtract(s).add(BigInteger.ONE);
|
|
||||||
Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,
|
|
||||||
"split granularity (%s) is greater than the range (%s)", numSplits, range);
|
|
||||||
|
|
||||||
BigInteger[] splits = new BigInteger[numSplits - 1];
|
|
||||||
BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));
|
|
||||||
for (int i = 1; i < numSplits; i++) {
|
|
||||||
// NOTE: this means the last region gets all the slop.
|
|
||||||
// This is not a big deal if we're assuming n << MAXHEX
|
|
||||||
splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger
|
|
||||||
.valueOf(i)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inclusive) {
|
|
||||||
BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];
|
|
||||||
inclusiveSplitPoints[0] = convertToBigInteger(start);
|
|
||||||
inclusiveSplitPoints[numSplits] = convertToBigInteger(end);
|
|
||||||
System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);
|
|
||||||
return convertToBytes(inclusiveSplitPoints);
|
|
||||||
} else {
|
|
||||||
return convertToBytes(splits);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] firstRow() {
|
public byte[] firstRow() {
|
||||||
return convertToByte(firstRowInt);
|
return convertToByte(firstRowInt);
|
||||||
}
|
}
|
||||||
|
@ -1106,32 +1061,6 @@ public class RegionSplitter {
|
||||||
return splits == null? null: Arrays.copyOfRange(splits, 1, splits.length - 1);
|
return splits == null? null: Arrays.copyOfRange(splits, 1, splits.length - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
|
|
||||||
if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {
|
|
||||||
start = firstRowBytes;
|
|
||||||
}
|
|
||||||
if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {
|
|
||||||
end = lastRowBytes;
|
|
||||||
}
|
|
||||||
Preconditions.checkArgument(
|
|
||||||
Bytes.compareTo(end, start) > 0,
|
|
||||||
"last row (%s) is configured less than first row (%s)",
|
|
||||||
Bytes.toStringBinary(end),
|
|
||||||
Bytes.toStringBinary(start));
|
|
||||||
|
|
||||||
byte[][] splits = Bytes.split(start, end, true,
|
|
||||||
numSplits - 1);
|
|
||||||
Preconditions.checkState(splits != null,
|
|
||||||
"Could not calculate input splits with given user input: " + this);
|
|
||||||
if (inclusive) {
|
|
||||||
return splits;
|
|
||||||
} else {
|
|
||||||
// remove endpoints, which are included in the splits list
|
|
||||||
return Arrays.copyOfRange(splits, 1, splits.length - 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] firstRow() {
|
public byte[] firstRow() {
|
||||||
return firstRowBytes;
|
return firstRowBytes;
|
||||||
|
|
|
@ -168,16 +168,6 @@ public class TestRegionSplitter {
|
||||||
// Halfway between df... and ff... should be ef....
|
// Halfway between df... and ff... should be ef....
|
||||||
splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
|
splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
|
||||||
assertArrayEquals(splitPoint,"efffffff".getBytes());
|
assertArrayEquals(splitPoint,"efffffff".getBytes());
|
||||||
|
|
||||||
// Check splitting region with multiple mappers per region
|
|
||||||
byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false);
|
|
||||||
assertEquals(2, splits.length);
|
|
||||||
assertArrayEquals(splits[0], "10000000".getBytes());
|
|
||||||
assertArrayEquals(splits[1], "20000000".getBytes());
|
|
||||||
|
|
||||||
splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true);
|
|
||||||
assertEquals(3, splits.length);
|
|
||||||
assertArrayEquals(splits[1], "10000000".getBytes());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -225,16 +215,6 @@ public class TestRegionSplitter {
|
||||||
|
|
||||||
splitPoint = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'b'});
|
splitPoint = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'b'});
|
||||||
assertArrayEquals(splitPoint, new byte[] { 'a', 'a', 'a', (byte) 0x80 });
|
assertArrayEquals(splitPoint, new byte[] { 'a', 'a', 'a', (byte) 0x80 });
|
||||||
|
|
||||||
// Check splitting region with multiple mappers per region
|
|
||||||
byte[][] splits = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'd'}, 3, false);
|
|
||||||
assertEquals(2, splits.length);
|
|
||||||
assertArrayEquals(splits[0], new byte[]{'a', 'a', 'b'});
|
|
||||||
assertArrayEquals(splits[1], new byte[]{'a', 'a', 'c'});
|
|
||||||
|
|
||||||
splits = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'e'}, 2, true);
|
|
||||||
assertEquals(3, splits.length);
|
|
||||||
assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c'});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue