From 367dfabf0694a5c72dbd5c30092a1ff9e30fca5c Mon Sep 17 00:00:00 2001 From: Ashu Pachauri Date: Sat, 30 Sep 2017 01:43:10 -0700 Subject: [PATCH] Revert "HBASE-18814 Improve TableSnapshotInputFormat to allow more multiple mappers per region" due to wrong jira id. This reverts commit f20580a53083b69eec3d766cf2a1f99d0bff9747. --- ...tegrationTestTableSnapshotInputFormat.java | 4 +- .../hbase/mapred/TableMapReduceUtil.java | 38 ------ .../mapred/TableSnapshotInputFormat.java | 18 --- .../hbase/mapreduce/TableMapReduceUtil.java | 38 ------ .../mapreduce/TableSnapshotInputFormat.java | 24 +--- .../TableSnapshotInputFormatImpl.java | 113 ++---------------- .../mapred/TestTableSnapshotInputFormat.java | 41 ++----- .../TableSnapshotInputFormatTestBase.java | 23 ++-- .../TestTableSnapshotInputFormat.java | 39 ++---- .../hbase/client/ClientSideRegionScanner.java | 2 - .../hadoop/hbase/util/RegionSplitter.java | 71 ----------- .../hadoop/hbase/util/TestRegionSplitter.java | 20 ---- 12 files changed, 50 insertions(+), 381 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java index 2df1c4bff18..1a152e8887f 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/mapreduce/IntegrationTestTableSnapshotInputFormat.java @@ -151,7 +151,7 @@ public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase int expectedNumSplits = numRegions > 2 ? numRegions - 2 : numRegions; org.apache.hadoop.hbase.mapreduce.TestTableSnapshotInputFormat.doTestWithMapReduce(util, - tableName, snapshotName, START_ROW, END_ROW, tableDir, numRegions, 1, + tableName, snapshotName, START_ROW, END_ROW, tableDir, numRegions, expectedNumSplits, false); } else if (mr.equalsIgnoreCase(MAPRED_IMPLEMENTATION)) { /* @@ -165,7 +165,7 @@ public class IntegrationTestTableSnapshotInputFormat extends IntegrationTestBase int expectedNumSplits = numRegions; org.apache.hadoop.hbase.mapred.TestTableSnapshotInputFormat.doTestWithMapReduce(util, - tableName, snapshotName, MAPRED_START_ROW, MAPRED_END_ROW, tableDir, numRegions, 1, + tableName, snapshotName, MAPRED_START_ROW, MAPRED_END_ROW, tableDir, numRegions, expectedNumSplits, false); } else { throw new IllegalArgumentException("Unrecognized mapreduce implementation: " + mr +"."); diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java index 0427f50ffec..35dbf029232 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableMapReduceUtil.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.mapreduce.ResultSerialization; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.security.UserProvider; import org.apache.hadoop.hbase.security.token.TokenUtil; -import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; @@ -186,43 +185,6 @@ public class TableMapReduceUtil { org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(job); } - /** - * Sets up the job for reading from a table snapshot. It bypasses hbase servers - * and read directly from snapshot files. - * - * @param snapshotName The name of the snapshot (of a table) to read from. - * @param columns The columns to scan. - * @param mapper The mapper class to use. - * @param outputKeyClass The class of the output key. - * @param outputValueClass The class of the output value. - * @param jobConf The current job to adjust. Make sure the passed job is - * carrying all necessary HBase configuration. - * @param addDependencyJars upload HBase jars and jars for any of the configured - * job classes via the distributed cache (tmpjars). - * @param tmpRestoreDir a temporary directory to copy the snapshot files into. Current user should - * have write permissions to this directory, and this should not be a subdirectory of rootdir. - * After the job is finished, restore directory can be deleted. - * @param splitAlgo algorithm to split - * @param numSplitsPerRegion how many input splits to generate per one region - * @throws IOException When setting up the details fails. - * @see TableSnapshotInputFormat - */ - public static void initTableSnapshotMapJob(String snapshotName, String columns, - Class mapper, - Class outputKeyClass, - Class outputValueClass, JobConf jobConf, - boolean addDependencyJars, Path tmpRestoreDir, - RegionSplitter.SplitAlgorithm splitAlgo, - int numSplitsPerRegion) - throws IOException { - TableSnapshotInputFormat.setInput(jobConf, snapshotName, tmpRestoreDir, splitAlgo, - numSplitsPerRegion); - initTableMapJob(snapshotName, columns, mapper, outputKeyClass, outputValueClass, jobConf, - addDependencyJars, TableSnapshotInputFormat.class); - org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.resetCacheConfig(jobConf); - } - - /** * Use this before submitting a TableReduce job. It will * appropriately set up the JobConf. diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java index 50c26db5f73..a0dafe73e9f 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapred/TableSnapshotInputFormat.java @@ -26,13 +26,11 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl; -import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapreduce.Job; import java.io.DataInput; import java.io.DataOutput; @@ -165,20 +163,4 @@ public class TableSnapshotInputFormat implements InputFormat mapper, - Class outputKeyClass, - Class outputValueClass, Job job, - boolean addDependencyJars, Path tmpRestoreDir, - RegionSplitter.SplitAlgorithm splitAlgo, - int numSplitsPerRegion) - throws IOException { - TableSnapshotInputFormat.setInput(job, snapshotName, tmpRestoreDir, splitAlgo, - numSplitsPerRegion); - initTableMapperJob(snapshotName, scan, mapper, outputKeyClass, - outputValueClass, job, addDependencyJars, false, TableSnapshotInputFormat.class); - resetCacheConfig(job.getConfiguration()); - } - /** * Use this before submitting a Multi TableMap job. It will appropriately set * up the job. diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java index 37cbf85dfe2..82850545467 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormat.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -67,10 +66,8 @@ import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTe * } * *

- * Internally, this input format restores the snapshot into the given tmp directory. By default, - * and similar to {@link TableInputFormat} an InputSplit is created per region, but optionally you - * can run N mapper tasks per every region, in which case the region key range will be split to - * N sub-ranges and an InputSplit will be created per sub-range. The region is opened for reading + * Internally, this input format restores the snapshot into the given tmp directory. Similar to + * {@link TableInputFormat} an InputSplit is created per region. The region is opened for reading * from each RecordReader. An internal RegionScanner is used to execute the * {@link org.apache.hadoop.hbase.CellScanner} obtained from the user. *

@@ -219,21 +216,4 @@ public class TableSnapshotInputFormat extends InputFormat) - Class.forName(splitAlgoClassName)).newInstance(); - } catch (ClassNotFoundException e) { - throw new IOException("SplitAlgo class " + splitAlgoClassName + - " is not found", e); - } catch (InstantiationException e) { - throw new IOException("SplitAlgo class " + splitAlgoClassName + - " is not instantiable", e); - } catch (IllegalAccessException e) { - throw new IOException("SplitAlgo class " + splitAlgoClassName + - " is not instantiable", e); - } - } - - public static List getRegionInfosFromManifest(SnapshotManifest manifest) { List regionManifests = manifest.getRegionManifests(); if (regionManifests == null) { @@ -344,12 +308,6 @@ public class TableSnapshotInputFormatImpl { public static List getSplits(Scan scan, SnapshotManifest manifest, List regionManifests, Path restoreDir, Configuration conf) throws IOException { - return getSplits(scan, manifest, regionManifests, restoreDir, conf, null, 1); - } - - public static List getSplits(Scan scan, SnapshotManifest manifest, - List regionManifests, Path restoreDir, - Configuration conf, RegionSplitter.SplitAlgorithm sa, int numSplits) throws IOException { // load table descriptor TableDescriptor htd = manifest.getTableDescriptor(); @@ -359,36 +317,16 @@ public class TableSnapshotInputFormatImpl { for (HRegionInfo hri : regionManifests) { // load region descriptor - if (numSplits > 1) { - byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true); - for (int i = 0; i < sp.length - 1; i++) { - if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), sp[i], - sp[i + 1])) { - // compute HDFS locations from snapshot files (which will get the locations for - // referred hfiles) - List hosts = getBestLocations(conf, - HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir)); + if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(), + hri.getEndKey())) { + // compute HDFS locations from snapshot files (which will get the locations for + // referred hfiles) + List hosts = getBestLocations(conf, + HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir)); - int len = Math.min(3, hosts.size()); - hosts = hosts.subList(0, len); - Scan boundedScan = new Scan(scan); - boundedScan.setStartRow(sp[i]); - boundedScan.setStopRow(sp[i + 1]); - splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir)); - } - } - } else { - if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(), - hri.getEndKey())) { - // compute HDFS locations from snapshot files (which will get the locations for - // referred hfiles) - List hosts = getBestLocations(conf, - HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir)); - - int len = Math.min(3, hosts.size()); - hosts = hosts.subList(0, len); - splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir)); - } + int len = Math.min(3, hosts.size()); + hosts = hosts.subList(0, len); + splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir)); } } @@ -423,7 +361,7 @@ public class TableSnapshotInputFormatImpl { // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality double cutoffMultiplier - = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER); + = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER); double filterWeight = topHost.getWeight() * cutoffMultiplier; @@ -457,35 +395,8 @@ public class TableSnapshotInputFormatImpl { */ public static void setInput(Configuration conf, String snapshotName, Path restoreDir) throws IOException { - setInput(conf, snapshotName, restoreDir, null, 1); - } - - /** - * Configures the job to use TableSnapshotInputFormat to read from a snapshot. - * @param conf the job to configure - * @param snapshotName the name of the snapshot to read from - * @param restoreDir a temporary directory to restore the snapshot into. Current user should - * have write permissions to this directory, and this should not be a subdirectory of rootdir. - * After the job is finished, restoreDir can be deleted. - * @param numSplitsPerRegion how many input splits to generate per one region - * @param splitAlgo SplitAlgorithm to be used when generating InputSplits - * @throws IOException if an error occurs - */ - public static void setInput(Configuration conf, String snapshotName, Path restoreDir, - RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) - throws IOException { conf.set(SNAPSHOT_NAME_KEY, snapshotName); - if (numSplitsPerRegion < 1) { - throw new IllegalArgumentException("numSplits must be >= 1, " + - "illegal numSplits : " + numSplitsPerRegion); - } - if (splitAlgo == null && numSplitsPerRegion > 1) { - throw new IllegalArgumentException("Split algo can't be null when numSplits > 1"); - } - if (splitAlgo != null) { - conf.set(SPLIT_ALGO, splitAlgo.getClass().getName()); - } - conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion); + Path rootDir = FSUtils.getRootDir(conf); FileSystem fs = rootDir.getFileSystem(conf); diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java index be36b6a6513..1c72f2a1832 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapred/TestTableSnapshotInputFormat.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase; import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobClient; @@ -138,20 +137,20 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa @Test @Override public void testWithMockedMapReduceMultiRegion() throws Exception { - testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 1, 10); + testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 10); } @Test @Override public void testWithMapReduceMultiRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 1, 10, false); + testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 10, false); } @Test @Override // run the MR job while HBase is offline public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 1, 10, true); + testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 10, true); } @Override @@ -165,7 +164,7 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa @Override protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, - int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception { + int numRegions, int expectedNumSplits) throws Exception { setupCluster(); final TableName tableName = TableName.valueOf(name.getMethodName()); try { @@ -175,16 +174,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa JobConf job = new JobConf(util.getConfiguration()); Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName); - if (numSplitsPerRegion > 1) { - TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, - COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(), - numSplitsPerRegion); - } else { - TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, - COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, false, tmpTableDir); - } + TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, + COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, + NullWritable.class, job, false, tmpTableDir); // mapred doesn't support start and end keys? o.O verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); @@ -233,16 +225,16 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa @Override protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName, - String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits, + String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception { doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir, - numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster); + numRegions, expectedNumSplits, shutdownCluster); } // this is also called by the IntegrationTestTableSnapshotInputFormat public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions, - int numSplitsPerRegion,int expectedNumSplits, boolean shutdownCluster) throws Exception { + int expectedNumSplits, boolean shutdownCluster) throws Exception { //create the table and snapshot createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions); @@ -259,16 +251,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf, TestTableSnapshotInputFormat.class); - if(numSplitsPerRegion > 1) { - TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, - TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, jobConf, true, tableDir, new RegionSplitter.UniformSplit(), - numSplitsPerRegion); - } else { - TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, - TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, jobConf, true, tableDir); - } + TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS, + TestTableSnapshotMapper.class, ImmutableBytesWritable.class, + NullWritable.class, jobConf, true, tableDir); jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); jobConf.setNumReduceTasks(1); diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java index 362dca1963a..fa47253720e 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatTestBase.java @@ -78,10 +78,10 @@ public abstract class TableSnapshotInputFormatTestBase { } protected abstract void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, - int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception; + int numRegions, int expectedNumSplits) throws Exception; protected abstract void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName, - String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits, + String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception; protected abstract byte[] getStartRow(); @@ -90,33 +90,28 @@ public abstract class TableSnapshotInputFormatTestBase { @Test public void testWithMockedMapReduceSingleRegion() throws Exception { - testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1, 1); + testWithMockedMapReduce(UTIL, "testWithMockedMapReduceSingleRegion", 1, 1); } @Test public void testWithMockedMapReduceMultiRegion() throws Exception { - testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 1, 8); + testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 8); } @Test public void testWithMapReduceSingleRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, 1, false); + testWithMapReduce(UTIL, "testWithMapReduceSingleRegion", 1, 1, false); } @Test public void testWithMapReduceMultiRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 1, 8, false); - } - - @Test - public void testWithMapReduceMultipleMappersPerRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 5, 50, false); + testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 8, false); } @Test // run the MR job while HBase is offline public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception { - testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 1, 8, true); + testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 8, true); } // Test that snapshot restore does not create back references in the HBase root dir. @@ -164,13 +159,13 @@ public abstract class TableSnapshotInputFormatTestBase { String snapshotName, Path tmpTableDir) throws Exception; protected void testWithMapReduce(HBaseTestingUtility util, String snapshotName, - int numRegions, int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception { + int numRegions, int expectedNumSplits, boolean shutdownCluster) throws Exception { setupCluster(); try { Path tableDir = util.getDataTestDirOnTestFS(snapshotName); TableName tableName = TableName.valueOf("testWithMapReduce"); testWithMapReduceImpl(util, tableName, snapshotName, tableDir, numRegions, - numSplitsPerRegion, expectedNumSplits, shutdownCluster); + expectedNumSplits, shutdownCluster); } finally { tearDownCluster(); } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java index 890eb2fe11a..028df983357 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java @@ -64,7 +64,6 @@ import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.util.FSUtils; -import org.apache.hadoop.hbase.util.RegionSplitter; @Category({VerySlowMapReduceTests.class, LargeTests.class}) public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase { @@ -210,7 +209,7 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa @Override public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName, - int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception { + int numRegions, int expectedNumSplits) throws Exception { setupCluster(); final TableName tableName = TableName.valueOf(name.getMethodName()); try { @@ -221,16 +220,9 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa Path tmpTableDir = util.getDataTestDirOnTestFS(snapshotName); Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan - if (numSplitsPerRegion > 1) { - TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, - scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(), - numSplitsPerRegion); - } else { - TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, - scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, false, tmpTableDir); - } + TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, + scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, + NullWritable.class, job, false, tmpTableDir); verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow()); @@ -332,16 +324,16 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa @Override protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName, - String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, - int expectedNumSplits, boolean shutdownCluster) throws Exception { + String snapshotName, Path tableDir, int numRegions, int expectedNumSplits, + boolean shutdownCluster) throws Exception { doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir, - numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster); + numRegions, expectedNumSplits, shutdownCluster); } // this is also called by the IntegrationTestTableSnapshotInputFormat public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName, String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions, - int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception { + int expectedNumSplits, boolean shutdownCluster) throws Exception { LOG.info("testing with MapReduce"); @@ -360,18 +352,11 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa job.setJarByClass(util.getClass()); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), - TestTableSnapshotInputFormat.class); + TestTableSnapshotInputFormat.class); - if (numSplitsPerRegion > 1) { - TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, - scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, true, tableDir, new RegionSplitter.UniformSplit(), - numSplitsPerRegion); - } else { - TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, - scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, - NullWritable.class, job, true, tableDir); - } + TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, + scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class, + NullWritable.class, job, true, tableDir); job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); job.setNumReduceTasks(1); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java index 141fcddd592..72a2ea47b83 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/ClientSideRegionScanner.java @@ -53,8 +53,6 @@ public class ClientSideRegionScanner extends AbstractClientScanner { // region is immutable, set isolation level scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED); - htd = TableDescriptorBuilder.newBuilder(htd).setReadOnly(true).build(); - // open region from the snapshot directory this.region = HRegion.openHRegion(conf, fs, rootDir, hri, htd, null, null, null); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java index 3ee593afca8..c31f72b0263 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.ClusterStatus.Option; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.MetaTableAccessor; @@ -182,17 +181,6 @@ public class RegionSplitter { */ byte[][] split(int numRegions); - /** - * Some MapReduce jobs may want to run multiple mappers per region, - * this is intended for such usecase. - * - * @param start first row (inclusive) - * @param end last row (exclusive) - * @param numSplits number of splits to generate - * @param inclusive whether start and end are returned as split points - */ - byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive); - /** * In HBase, the first row is represented by an empty byte array. This might * cause problems with your split algorithm or row printing. All your APIs @@ -930,39 +918,6 @@ public class RegionSplitter { return convertToBytes(splits); } - @Override - public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) { - BigInteger s = convertToBigInteger(start); - BigInteger e = convertToBigInteger(end); - - Preconditions.checkArgument(e.compareTo(s) > 0, - "last row (%s) is configured less than first row (%s)", rowToStr(end), - end); - // +1 to range because the last row is inclusive - BigInteger range = e.subtract(s).add(BigInteger.ONE); - Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0, - "split granularity (%s) is greater than the range (%s)", numSplits, range); - - BigInteger[] splits = new BigInteger[numSplits - 1]; - BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits)); - for (int i = 1; i < numSplits; i++) { - // NOTE: this means the last region gets all the slop. - // This is not a big deal if we're assuming n << MAXHEX - splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger - .valueOf(i))); - } - - if (inclusive) { - BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1]; - inclusiveSplitPoints[0] = convertToBigInteger(start); - inclusiveSplitPoints[numSplits] = convertToBigInteger(end); - System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length); - return convertToBytes(inclusiveSplitPoints); - } else { - return convertToBytes(splits); - } - } - public byte[] firstRow() { return convertToByte(firstRowInt); } @@ -1106,32 +1061,6 @@ public class RegionSplitter { return splits == null? null: Arrays.copyOfRange(splits, 1, splits.length - 1); } - @Override - public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) { - if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) { - start = firstRowBytes; - } - if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) { - end = lastRowBytes; - } - Preconditions.checkArgument( - Bytes.compareTo(end, start) > 0, - "last row (%s) is configured less than first row (%s)", - Bytes.toStringBinary(end), - Bytes.toStringBinary(start)); - - byte[][] splits = Bytes.split(start, end, true, - numSplits - 1); - Preconditions.checkState(splits != null, - "Could not calculate input splits with given user input: " + this); - if (inclusive) { - return splits; - } else { - // remove endpoints, which are included in the splits list - return Arrays.copyOfRange(splits, 1, splits.length - 1); - } - } - @Override public byte[] firstRow() { return firstRowBytes; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java index aa42616832f..19e048c01c9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java @@ -168,16 +168,6 @@ public class TestRegionSplitter { // Halfway between df... and ff... should be ef.... splitPoint = splitter.split("dfffffff".getBytes(), lastRow); assertArrayEquals(splitPoint,"efffffff".getBytes()); - - // Check splitting region with multiple mappers per region - byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false); - assertEquals(2, splits.length); - assertArrayEquals(splits[0], "10000000".getBytes()); - assertArrayEquals(splits[1], "20000000".getBytes()); - - splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true); - assertEquals(3, splits.length); - assertArrayEquals(splits[1], "10000000".getBytes()); } /** @@ -225,16 +215,6 @@ public class TestRegionSplitter { splitPoint = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'b'}); assertArrayEquals(splitPoint, new byte[] { 'a', 'a', 'a', (byte) 0x80 }); - - // Check splitting region with multiple mappers per region - byte[][] splits = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'd'}, 3, false); - assertEquals(2, splits.length); - assertArrayEquals(splits[0], new byte[]{'a', 'a', 'b'}); - assertArrayEquals(splits[1], new byte[]{'a', 'a', 'c'}); - - splits = splitter.split(new byte[] {'a', 'a', 'a'}, new byte[] {'a', 'a', 'e'}, 2, true); - assertEquals(3, splits.length); - assertArrayEquals(splits[1], new byte[] { 'a', 'a', 'c'}); } @Test