HBASE-20361 Non-successive TableInputSplits may wrongly be merged by auto balancing feature

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
Yuki Tawara 2018-04-08 01:46:52 +09:00 committed by tedyu
parent f66343050f
commit c7593d14a9
2 changed files with 242 additions and 2 deletions

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.net.DNS; import org.apache.hadoop.net.DNS;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
/** /**
* A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName}, * A base for {@link TableInputFormat}s. Receives a {@link Connection}, a {@link TableName},
@ -291,7 +292,7 @@ public abstract class TableInputFormatBase
*/ */
private List<InputSplit> oneInputSplitPerRegion() throws IOException { private List<InputSplit> oneInputSplitPerRegion() throws IOException {
RegionSizeCalculator sizeCalculator = RegionSizeCalculator sizeCalculator =
new RegionSizeCalculator(getRegionLocator(), getAdmin()); createRegionSizeCalculator(getRegionLocator(), getAdmin());
TableName tableName = getTable().getName(); TableName tableName = getTable().getName();
@ -478,7 +479,8 @@ public abstract class TableInputFormatBase
while (j < splits.size()) { while (j < splits.size()) {
TableSplit nextRegion = (TableSplit) splits.get(j); TableSplit nextRegion = (TableSplit) splits.get(j);
long nextRegionSize = nextRegion.getLength(); long nextRegionSize = nextRegion.getLength();
if (totalSize + nextRegionSize <= averageRegionSize) { if (totalSize + nextRegionSize <= averageRegionSize
&& Bytes.equals(splitEndKey, nextRegion.getStartRow())) {
totalSize = totalSize + nextRegionSize; totalSize = totalSize + nextRegionSize;
splitEndKey = nextRegion.getEndRow(); splitEndKey = nextRegion.getEndRow();
j++; j++;
@ -586,6 +588,12 @@ public abstract class TableInputFormatBase
this.connection = connection; this.connection = connection;
} }
@VisibleForTesting
protected RegionSizeCalculator createRegionSizeCalculator(RegionLocator locator, Admin admin)
throws IOException {
return new RegionSizeCalculator(locator, admin);
}
/** /**
* Gets the scan defining the actual details like columns etc. * Gets the scan defining the actual details like columns etc.
* *

View File

@ -18,15 +18,45 @@
package org.apache.hadoop.hbase.mapreduce; package org.apache.hadoop.hbase.mapreduce;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.net.Inet6Address; import java.net.Inet6Address;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.BufferedMutator;
import org.apache.hadoop.hbase.client.BufferedMutatorParams;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableBuilder;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.mapreduce.JobContext;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
@Category({SmallTests.class}) @Category({SmallTests.class})
public class TestTableInputFormatBase { public class TestTableInputFormatBase {
@ -55,4 +85,206 @@ public class TestTableInputFormatBase {
assertEquals("Should retrun the hostname for this host. Expected : " + assertEquals("Should retrun the hostname for this host. Expected : " +
localhost + " Actual : " + actualHostName, localhost, actualHostName); localhost + " Actual : " + actualHostName, localhost, actualHostName);
} }
@Test
public void testNonSuccessiveSplitsAreNotMerged() throws IOException {
JobContext context = mock(JobContext.class);
Configuration conf = HBaseConfiguration.create();
conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL,
ConnectionForMergeTesting.class.getName());
conf.set(TableInputFormat.INPUT_TABLE, "testTable");
conf.setBoolean(TableInputFormatBase.MAPREDUCE_INPUT_AUTOBALANCE, true);
when(context.getConfiguration()).thenReturn(conf);
TableInputFormat tifExclude = new TableInputFormatForMergeTesting();
tifExclude.setConf(conf);
// split["b", "c"] is excluded, split["o", "p"] and split["p", "q"] are merged,
// but split["a", "b"] and split["c", "d"] are not merged.
assertEquals(ConnectionForMergeTesting.START_KEYS.length - 1 - 1,
tifExclude.getSplits(context).size());
}
/**
* Subclass of {@link TableInputFormat} to use in {@link #testNonSuccessiveSplitsAreNotMerged}.
* This class overrides {@link TableInputFormatBase#includeRegionInSplit}
* to exclude specific splits.
*/
private static class TableInputFormatForMergeTesting extends TableInputFormat {
private byte[] prefixStartKey = Bytes.toBytes("b");
private byte[] prefixEndKey = Bytes.toBytes("c");
private RegionSizeCalculator sizeCalculator;
/**
* Exclude regions which contain rows starting with "b".
*/
@Override
protected boolean includeRegionInSplit(final byte[] startKey, final byte [] endKey) {
if (Bytes.compareTo(startKey, prefixEndKey) < 0
&& (Bytes.compareTo(prefixStartKey, endKey) < 0
|| Bytes.equals(endKey, HConstants.EMPTY_END_ROW))) {
return false;
} else {
return true;
}
}
@Override
protected void initializeTable(Connection connection, TableName tableName) throws IOException {
super.initializeTable(connection, tableName);
ConnectionForMergeTesting cft = (ConnectionForMergeTesting) connection;
sizeCalculator = cft.getRegionSizeCalculator();
}
@Override
protected RegionSizeCalculator createRegionSizeCalculator(RegionLocator locator, Admin admin)
throws IOException {
return sizeCalculator;
}
}
/**
* Connection class to use in {@link #testNonSuccessiveSplitsAreNotMerged}.
* This class returns mocked {@link Table}, {@link RegionLocator}, {@link RegionSizeCalculator},
* and {@link Admin}.
*/
private static class ConnectionForMergeTesting implements Connection {
public static final byte[][] SPLITS = new byte[][] {
Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c"), Bytes.toBytes("d"),
Bytes.toBytes("e"), Bytes.toBytes("f"), Bytes.toBytes("g"), Bytes.toBytes("h"),
Bytes.toBytes("i"), Bytes.toBytes("j"), Bytes.toBytes("k"), Bytes.toBytes("l"),
Bytes.toBytes("m"), Bytes.toBytes("n"), Bytes.toBytes("o"), Bytes.toBytes("p"),
Bytes.toBytes("q"), Bytes.toBytes("r"), Bytes.toBytes("s"), Bytes.toBytes("t"),
Bytes.toBytes("u"), Bytes.toBytes("v"), Bytes.toBytes("w"), Bytes.toBytes("x"),
Bytes.toBytes("y"), Bytes.toBytes("z")
};
public static final byte[][] START_KEYS;
public static final byte[][] END_KEYS;
static {
START_KEYS = new byte[SPLITS.length + 1][];
START_KEYS[0] = HConstants.EMPTY_BYTE_ARRAY;
for (int i = 0; i < SPLITS.length; i++) {
START_KEYS[i + 1] = SPLITS[i];
}
END_KEYS = new byte[SPLITS.length + 1][];
for (int i = 0; i < SPLITS.length; i++) {
END_KEYS[i] = SPLITS[i];
}
END_KEYS[SPLITS.length] = HConstants.EMPTY_BYTE_ARRAY;
}
public static final Map<byte[], Long> SIZE_MAP = new TreeMap<>(Bytes.BYTES_COMPARATOR);
static {
for (byte[] startKey : START_KEYS) {
SIZE_MAP.put(startKey, 1024L * 1024L * 1024L);
}
SIZE_MAP.put(Bytes.toBytes("a"), 200L * 1024L * 1024L);
SIZE_MAP.put(Bytes.toBytes("b"), 200L * 1024L * 1024L);
SIZE_MAP.put(Bytes.toBytes("c"), 200L * 1024L * 1024L);
SIZE_MAP.put(Bytes.toBytes("o"), 200L * 1024L * 1024L);
SIZE_MAP.put(Bytes.toBytes("p"), 200L * 1024L * 1024L);
}
ConnectionForMergeTesting(Configuration conf, ExecutorService pool, User user)
throws IOException {
}
@Override
public void abort(String why, Throwable e) {
}
@Override
public boolean isAborted() {
return false;
}
@Override
public Configuration getConfiguration() {
throw new UnsupportedOperationException();
}
@Override
public Table getTable(TableName tableName) throws IOException {
Table table = mock(Table.class);
when(table.getName()).thenReturn(tableName);
return table;
}
@Override
public Table getTable(TableName tableName, ExecutorService pool) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BufferedMutator getBufferedMutator(TableName tableName) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BufferedMutator getBufferedMutator(BufferedMutatorParams params) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public RegionLocator getRegionLocator(TableName tableName) throws IOException {
final Map<byte[], HRegionLocation> locationMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (byte[] startKey : START_KEYS) {
HRegionLocation hrl = new HRegionLocation(
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).build(),
ServerName.valueOf("localhost", 0, 0));
locationMap.put(startKey, hrl);
}
RegionLocator locator = mock(RegionLocator.class);
when(locator.getRegionLocation(any(byte [].class), anyBoolean())).
thenAnswer(new Answer<HRegionLocation>() {
@Override
public HRegionLocation answer(InvocationOnMock invocationOnMock) throws Throwable {
Object [] args = invocationOnMock.getArguments();
byte [] key = (byte [])args[0];
return locationMap.get(key);
}
});
when(locator.getStartEndKeys()).
thenReturn(new Pair<byte[][], byte[][]>(START_KEYS, END_KEYS));
return locator;
}
public RegionSizeCalculator getRegionSizeCalculator() {
RegionSizeCalculator sizeCalculator = mock(RegionSizeCalculator.class);
when(sizeCalculator.getRegionSize(any(byte[].class))).
thenAnswer(new Answer<Long>() {
@Override
public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
Object [] args = invocationOnMock.getArguments();
byte [] regionId = (byte [])args[0];
byte[] startKey = RegionInfo.getStartKey(regionId);
return SIZE_MAP.get(startKey);
}
});
return sizeCalculator;
}
@Override
public Admin getAdmin() throws IOException {
Admin admin = mock(Admin.class);
// return non-null admin to pass null checks
return admin;
}
@Override
public void close() throws IOException {
}
@Override
public boolean isClosed() {
return false;
}
@Override
public TableBuilder getTableBuilder(TableName tableName, ExecutorService pool) {
throw new UnsupportedOperationException();
}
}
} }