HBASE-26878 TableInputFormatBase should cache RegionSizeCalculator (#4271)
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
ec7141720f
commit
2a3ac5b33c
|
@ -139,6 +139,8 @@ public abstract class TableInputFormatBase
|
|||
private TableRecordReader tableRecordReader = null;
|
||||
/** The underlying {@link Connection} of the table. */
|
||||
private Connection connection;
|
||||
/** Used to generate splits based on region size. */
|
||||
private RegionSizeCalculator regionSizeCalculator;
|
||||
|
||||
|
||||
/** The reverse DNS lookup cache mapping: IPAddress => HostName */
|
||||
|
@ -288,8 +290,11 @@ public abstract class TableInputFormatBase
|
|||
* @throws IOException throws IOException
|
||||
*/
|
||||
private List<InputSplit> oneInputSplitPerRegion() throws IOException {
|
||||
RegionSizeCalculator sizeCalculator =
|
||||
createRegionSizeCalculator(getRegionLocator(), getAdmin());
|
||||
if (regionSizeCalculator == null) {
|
||||
// Initialize here rather than with the other resources because this involves
|
||||
// a full scan of meta, which can be heavy. We might as well only do it if/when necessary.
|
||||
regionSizeCalculator = createRegionSizeCalculator(getRegionLocator(), getAdmin());
|
||||
}
|
||||
|
||||
TableName tableName = getTable().getName();
|
||||
|
||||
|
@ -302,7 +307,7 @@ public abstract class TableInputFormatBase
|
|||
throw new IOException("Expecting at least one region.");
|
||||
}
|
||||
List<InputSplit> splits = new ArrayList<>(1);
|
||||
long regionSize = sizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
|
||||
long regionSize = regionSizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
|
||||
// In the table input format for single table we do not need to
|
||||
// store the scan object in table split because it can be memory intensive and redundant
|
||||
// information to what is already stored in conf SCAN. See HBASE-25212
|
||||
|
@ -345,7 +350,7 @@ public abstract class TableInputFormatBase
|
|||
|
||||
byte[] regionName = location.getRegion().getRegionName();
|
||||
String encodedRegionName = location.getRegion().getEncodedName();
|
||||
long regionSize = sizeCalculator.getRegionSize(regionName);
|
||||
long regionSize = regionSizeCalculator.getRegionSize(regionName);
|
||||
// In the table input format for single table we do not need to
|
||||
// store the scan object in table split because it can be memory intensive and redundant
|
||||
// information to what is already stored in conf SCAN. See HBASE-25212
|
||||
|
@ -597,6 +602,7 @@ public abstract class TableInputFormatBase
|
|||
this.regionLocator = connection.getRegionLocator(tableName);
|
||||
this.admin = connection.getAdmin();
|
||||
this.connection = connection;
|
||||
this.regionSizeCalculator = null;
|
||||
}
|
||||
|
||||
@InterfaceAudience.Private
|
||||
|
@ -664,6 +670,7 @@ public abstract class TableInputFormatBase
|
|||
table = null;
|
||||
regionLocator = null;
|
||||
connection = null;
|
||||
regionSizeCalculator = null;
|
||||
}
|
||||
|
||||
private void close(Closeable... closables) throws IOException {
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyBoolean;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -56,6 +57,7 @@ import org.apache.hadoop.mapreduce.JobContext;
|
|||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.mockito.Mockito;
|
||||
import org.mockito.invocation.InvocationOnMock;
|
||||
import org.mockito.stubbing.Answer;
|
||||
|
||||
|
@ -66,6 +68,34 @@ public class TestTableInputFormatBase {
|
|||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestTableInputFormatBase.class);
|
||||
|
||||
@Test
|
||||
public void testReuseRegionSizeCalculator() throws IOException {
|
||||
JobContext context = mock(JobContext.class);
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
conf.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL,
|
||||
ConnectionForMergeTesting.class.getName());
|
||||
conf.set(TableInputFormat.INPUT_TABLE, "testTable");
|
||||
conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
|
||||
when(context.getConfiguration()).thenReturn(conf);
|
||||
|
||||
TableInputFormat format = Mockito.spy(new TableInputFormatForMergeTesting());
|
||||
format.setConf(conf);
|
||||
// initialize so that table is set, otherwise cloneOnFinish
|
||||
// will be true and each getSplits call will re-initialize everything
|
||||
format.initialize(context);
|
||||
format.getSplits(context);
|
||||
format.getSplits(context);
|
||||
|
||||
// re-initialize which will cause the next getSplits call to create a new RegionSizeCalculator
|
||||
format.initialize(context);
|
||||
format.getSplits(context);
|
||||
format.getSplits(context);
|
||||
|
||||
// should only be 2 despite calling getSplits 4 times
|
||||
Mockito.verify(format, Mockito.times(2))
|
||||
.createRegionSizeCalculator(Mockito.any(), Mockito.any());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableInputFormatBaseReverseDNSForIPv6()
|
||||
throws UnknownHostException {
|
||||
|
|
Loading…
Reference in New Issue