HBASE-26878 TableInputFormatBase should cache RegionSizeCalculator (#4271)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Bryan Beaudreault 2022-03-24 17:54:49 -04:00 committed by GitHub
parent ec7141720f
commit 2a3ac5b33c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 4 deletions

View File

@ -139,6 +139,8 @@ public abstract class TableInputFormatBase
private TableRecordReader tableRecordReader = null;
/** The underlying {@link Connection} of the table. */
private Connection connection;
/** Used to generate splits based on region size. */
private RegionSizeCalculator regionSizeCalculator;
/** The reverse DNS lookup cache mapping: IPAddress => HostName */
@ -288,8 +290,11 @@ public abstract class TableInputFormatBase
* @throws IOException throws IOException
*/
private List<InputSplit> oneInputSplitPerRegion() throws IOException {
RegionSizeCalculator sizeCalculator =
createRegionSizeCalculator(getRegionLocator(), getAdmin());
if (regionSizeCalculator == null) {
// Initialize here rather than with the other resources because this involves
// a full scan of meta, which can be heavy. We might as well only do it if/when necessary.
regionSizeCalculator = createRegionSizeCalculator(getRegionLocator(), getAdmin());
}
TableName tableName = getTable().getName();
@ -302,7 +307,7 @@ public abstract class TableInputFormatBase
throw new IOException("Expecting at least one region.");
}
List<InputSplit> splits = new ArrayList<>(1);
long regionSize = sizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
long regionSize = regionSizeCalculator.getRegionSize(regLoc.getRegion().getRegionName());
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@ -345,7 +350,7 @@ public abstract class TableInputFormatBase
byte[] regionName = location.getRegion().getRegionName();
String encodedRegionName = location.getRegion().getEncodedName();
long regionSize = sizeCalculator.getRegionSize(regionName);
long regionSize = regionSizeCalculator.getRegionSize(regionName);
// In the table input format for single table we do not need to
// store the scan object in table split because it can be memory intensive and redundant
// information to what is already stored in conf SCAN. See HBASE-25212
@ -597,6 +602,7 @@ public abstract class TableInputFormatBase
this.regionLocator = connection.getRegionLocator(tableName);
this.admin = connection.getAdmin();
this.connection = connection;
this.regionSizeCalculator = null;
}
@InterfaceAudience.Private
@ -664,6 +670,7 @@ public abstract class TableInputFormatBase
table = null;
regionLocator = null;
connection = null;
regionSizeCalculator = null;
}
private void close(Closeable... closables) throws IOException {

View File

@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.IOException;
@ -56,6 +57,7 @@ import org.apache.hadoop.mapreduce.JobContext;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
@ -66,6 +68,34 @@ public class TestTableInputFormatBase {
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestTableInputFormatBase.class);
@Test
public void testReuseRegionSizeCalculator() throws IOException {
JobContext context = mock(JobContext.class);
Configuration conf = HBaseConfiguration.create();
conf.set(ConnectionUtils.HBASE_CLIENT_CONNECTION_IMPL,
ConnectionForMergeTesting.class.getName());
conf.set(TableInputFormat.INPUT_TABLE, "testTable");
conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
when(context.getConfiguration()).thenReturn(conf);
TableInputFormat format = Mockito.spy(new TableInputFormatForMergeTesting());
format.setConf(conf);
// initialize so that table is set, otherwise cloneOnFinish
// will be true and each getSplits call will re-initialize everything
format.initialize(context);
format.getSplits(context);
format.getSplits(context);
// re-initialize which will cause the next getSplits call to create a new RegionSizeCalculator
format.initialize(context);
format.getSplits(context);
format.getSplits(context);
// should only be 2 despite calling getSplits 4 times
Mockito.verify(format, Mockito.times(2))
.createRegionSizeCalculator(Mockito.any(), Mockito.any());
}
@Test
public void testTableInputFormatBaseReverseDNSForIPv6()
throws UnknownHostException {