HBASE-21965 Fix failed split and merge transactions that have failed to roll back
This commit is contained in:
parent
b0de9d8391
commit
8a6b8e81aa
|
@ -19,10 +19,12 @@ package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
|
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
|
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;
|
||||||
|
@ -172,4 +174,18 @@ public class HBaseHbck implements Hbck {
|
||||||
throw new IOException(se);
|
throw new IOException(se);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, MasterProtos.RegionErrorType>
|
||||||
|
getFailedSplitMergeLegacyRegions(List<TableName> tableNames) throws IOException {
|
||||||
|
try {
|
||||||
|
MasterProtos.GetFailedSplitMergeLegacyRegionsResponse response =
|
||||||
|
this.hbck.getFailedSplitMergeLegacyRegions(rpcControllerFactory.newController(),
|
||||||
|
RequestConverter.toGetFailedSplitMergeLegacyRegionsRequest(tableNames));
|
||||||
|
return response.getErrorsMap();
|
||||||
|
} catch (ServiceException se) {
|
||||||
|
LOG.debug("get failed split/merge legacy regions failed", se);
|
||||||
|
throw new IOException(se);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,15 @@ package org.apache.hadoop.hbase.client;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.Abortable;
|
import org.apache.hadoop.hbase.Abortable;
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call
|
* Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call
|
||||||
|
@ -106,4 +109,16 @@ public interface Hbck extends Abortable, Closeable {
|
||||||
|
|
||||||
List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
|
List<Long> scheduleServerCrashProcedure(List<HBaseProtos.ServerName> serverNames)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method is to get the regions which left by failed split/merge procedures for a certain
|
||||||
|
* table. There are two kinds of region this method will return. One is orphan regions left on FS,
|
||||||
|
* which left because split/merge procedure crashed before updating meta. And the other one is
|
||||||
|
* unassigned split daughter region or merged region, which left because split/merge procedure
|
||||||
|
* crashed before assignment.
|
||||||
|
* @param tableName table to check
|
||||||
|
* @return Map of problematic regions
|
||||||
|
*/
|
||||||
|
Map<String, MasterProtos.RegionErrorType>
|
||||||
|
getFailedSplitMergeLegacyRegions(List<TableName> tableName) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1884,6 +1884,15 @@ public final class RequestConverter {
|
||||||
return b.addAllServerName(serverNames).build();
|
return b.addAllServerName(serverNames).build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static MasterProtos.GetFailedSplitMergeLegacyRegionsRequest
|
||||||
|
toGetFailedSplitMergeLegacyRegionsRequest(List<TableName> tableNames) {
|
||||||
|
MasterProtos.GetFailedSplitMergeLegacyRegionsRequest.Builder b =
|
||||||
|
MasterProtos.GetFailedSplitMergeLegacyRegionsRequest.newBuilder();
|
||||||
|
List<HBaseProtos.TableName> protoTableNames = tableNames.stream()
|
||||||
|
.map(tableName -> ProtobufUtil.toProtoTableName(tableName)).collect(Collectors.toList());
|
||||||
|
return b.addAllTable(protoTableNames).build();
|
||||||
|
}
|
||||||
|
|
||||||
private static List<RegionSpecifier> toEncodedRegionNameRegionSpecifiers(
|
private static List<RegionSpecifier> toEncodedRegionNameRegionSpecifiers(
|
||||||
List<String> encodedRegionNames) {
|
List<String> encodedRegionNames) {
|
||||||
return encodedRegionNames.stream().
|
return encodedRegionNames.stream().
|
||||||
|
|
|
@ -1093,6 +1093,19 @@ message ScheduleServerCrashProcedureResponse {
|
||||||
repeated uint64 pid = 1;
|
repeated uint64 pid = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message GetFailedSplitMergeLegacyRegionsRequest {
|
||||||
|
repeated TableName table = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RegionErrorType {
|
||||||
|
DAUGHTER_MERGED_REGION_NOT_ONLINE = 0;
|
||||||
|
ORPHAN_REGION_ON_FS = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message GetFailedSplitMergeLegacyRegionsResponse {
|
||||||
|
map<string, RegionErrorType> errors = 1;
|
||||||
|
}
|
||||||
|
|
||||||
service HbckService {
|
service HbckService {
|
||||||
/** Update state of the table in meta only*/
|
/** Update state of the table in meta only*/
|
||||||
rpc SetTableStateInMeta(SetTableStateInMetaRequest)
|
rpc SetTableStateInMeta(SetTableStateInMetaRequest)
|
||||||
|
@ -1123,4 +1136,7 @@ service HbckService {
|
||||||
/** Schedule a ServerCrashProcedure to help recover a crash server */
|
/** Schedule a ServerCrashProcedure to help recover a crash server */
|
||||||
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
|
rpc ScheduleServerCrashProcedure(ScheduleServerCrashProcedureRequest)
|
||||||
returns(ScheduleServerCrashProcedureResponse);
|
returns(ScheduleServerCrashProcedureResponse);
|
||||||
|
|
||||||
|
rpc getFailedSplitMergeLegacyRegions(GetFailedSplitMergeLegacyRegionsRequest)
|
||||||
|
returns(GetFailedSplitMergeLegacyRegionsResponse);
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,12 +27,14 @@ import java.net.InetSocketAddress;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hbase.ClusterMetricsBuilder;
|
import org.apache.hadoop.hbase.ClusterMetricsBuilder;
|
||||||
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
||||||
|
@ -49,6 +51,7 @@ import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||||
import org.apache.hadoop.hbase.client.MasterSwitchType;
|
import org.apache.hadoop.hbase.client.MasterSwitchType;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
import org.apache.hadoop.hbase.client.Table;
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
import org.apache.hadoop.hbase.client.TableState;
|
import org.apache.hadoop.hbase.client.TableState;
|
||||||
|
@ -66,7 +69,10 @@ import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcServerFactory;
|
import org.apache.hadoop.hbase.ipc.RpcServerFactory;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
|
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
|
||||||
import org.apache.hadoop.hbase.ipc.ServerRpcController;
|
import org.apache.hadoop.hbase.ipc.ServerRpcController;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
|
||||||
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
|
||||||
import org.apache.hadoop.hbase.master.locking.LockProcedure;
|
import org.apache.hadoop.hbase.master.locking.LockProcedure;
|
||||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
|
||||||
|
@ -86,6 +92,7 @@ import org.apache.hadoop.hbase.quotas.MasterQuotaManager;
|
||||||
import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
|
import org.apache.hadoop.hbase.quotas.QuotaObserverChore;
|
||||||
import org.apache.hadoop.hbase.quotas.QuotaUtil;
|
import org.apache.hadoop.hbase.quotas.QuotaUtil;
|
||||||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
|
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshot;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
||||||
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
|
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
|
||||||
import org.apache.hadoop.hbase.regionserver.RpcSchedulerFactory;
|
import org.apache.hadoop.hbase.regionserver.RpcSchedulerFactory;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
|
@ -106,8 +113,10 @@ import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
|
||||||
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
|
import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.apache.hadoop.hbase.util.PairOfSameType;
|
||||||
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
|
||||||
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
@ -2469,6 +2478,163 @@ public class MasterRpcServices extends RSRpcServices
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MasterProtos.GetFailedSplitMergeLegacyRegionsResponse getFailedSplitMergeLegacyRegions(
|
||||||
|
RpcController controller, MasterProtos.GetFailedSplitMergeLegacyRegionsRequest request)
|
||||||
|
throws ServiceException {
|
||||||
|
List<HBaseProtos.TableName> tables = request.getTableList();
|
||||||
|
|
||||||
|
Map<String, MasterProtos.RegionErrorType> errorRegions = new HashMap<>();
|
||||||
|
try {
|
||||||
|
for (HBaseProtos.TableName tableName : tables) {
|
||||||
|
errorRegions.putAll(getFailedSplitMergeLegacyRegions(ProtobufUtil.toTableName(tableName)));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServiceException(e);
|
||||||
|
}
|
||||||
|
return MasterProtos.GetFailedSplitMergeLegacyRegionsResponse.newBuilder()
|
||||||
|
.putAllErrors(errorRegions).build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, MasterProtos.RegionErrorType>
|
||||||
|
getFailedSplitMergeLegacyRegions(TableName tableName) throws IOException {
|
||||||
|
if (!MetaTableAccessor.tableExists(master.getConnection(), tableName)) {
|
||||||
|
throw new IOException("table " + tableName.getNameAsString() + " doesn't exist");
|
||||||
|
}
|
||||||
|
if (!MetaTableAccessor.getTableState(master.getConnection(), tableName).isEnabled()) {
|
||||||
|
throw new IOException(
|
||||||
|
"table " + tableName.getNameAsString() + " is not enabled yet");
|
||||||
|
}
|
||||||
|
final Map<String, MasterProtos.RegionErrorType> problemRegions = new HashMap<>();
|
||||||
|
|
||||||
|
// Case 1. find orphan region on fs
|
||||||
|
// orphan regions may due to a failed split region procedure, which daughter regions are created
|
||||||
|
// then the procedure is aborted. Or merged region is created then the procedure is aborted.
|
||||||
|
List<String> orphanRegions = findOrphanRegionOnFS(tableName);
|
||||||
|
orphanRegions.stream().forEach(
|
||||||
|
region -> problemRegions.put(region, MasterProtos.RegionErrorType.ORPHAN_REGION_ON_FS));
|
||||||
|
|
||||||
|
// Case 2. find unassigned daughter regions or merged regions
|
||||||
|
List<String> unassignedDaughterOrMergedRegions =
|
||||||
|
findUnassignedDaughterOrMergedRegions(tableName);
|
||||||
|
unassignedDaughterOrMergedRegions.stream().forEach(region -> problemRegions.put(region,
|
||||||
|
MasterProtos.RegionErrorType.DAUGHTER_MERGED_REGION_NOT_ONLINE));
|
||||||
|
|
||||||
|
// if these regions in problemRegions are currently handled by SplitTableRegionProcedure or
|
||||||
|
// MergeTableRegionsProcedure, we should remove them from this map
|
||||||
|
master.getProcedures().stream().filter(p -> !(p.isFinished() || p.isBypass())).forEach(p -> {
|
||||||
|
if (p instanceof SplitTableRegionProcedure) {
|
||||||
|
problemRegions
|
||||||
|
.remove(((SplitTableRegionProcedure) p).getDaughterOneRI().getRegionNameAsString());
|
||||||
|
problemRegions
|
||||||
|
.remove(((SplitTableRegionProcedure) p).getDaughterTwoRI().getRegionNameAsString());
|
||||||
|
} else if (p instanceof MergeTableRegionsProcedure) {
|
||||||
|
problemRegions
|
||||||
|
.remove(((MergeTableRegionsProcedure) p).getMergedRegion().getRegionNameAsString());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// check if regions are still problematic now
|
||||||
|
checkRegionStillProblematic(problemRegions, tableName);
|
||||||
|
return problemRegions;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void checkRegionStillProblematic(
|
||||||
|
Map<String, MasterProtos.RegionErrorType> problemRegions, TableName tableName)
|
||||||
|
throws IOException {
|
||||||
|
Iterator<Map.Entry<String, MasterProtos.RegionErrorType>> iterator =
|
||||||
|
problemRegions.entrySet().iterator();
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
Map.Entry<String, MasterProtos.RegionErrorType> entry = iterator.next();
|
||||||
|
Result r = MetaTableAccessor.getRegionResult(master.getConnection(),
|
||||||
|
Bytes.toBytesBinary(entry.getKey()));
|
||||||
|
switch (entry.getValue()) {
|
||||||
|
case ORPHAN_REGION_ON_FS:
|
||||||
|
// region is build for this directory, it is not a problematic region any more
|
||||||
|
if (r != null) {
|
||||||
|
iterator.remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case DAUGHTER_MERGED_REGION_NOT_ONLINE:
|
||||||
|
RegionState.State state = RegionStateStore.getRegionState(r, 0);
|
||||||
|
if (!state.matches(RegionState.State.CLOSED, RegionState.State.SPLITTING_NEW,
|
||||||
|
RegionState.State.MERGED)) {
|
||||||
|
iterator.remove();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IOException("there should be no problematic region of this type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> findUnassignedDaughterOrMergedRegions(TableName tableName)
|
||||||
|
throws IOException {
|
||||||
|
Set<String> checkRegions = new HashSet<>();
|
||||||
|
Map<String, RegionState.State> regionStates = new HashMap<>();
|
||||||
|
Map<String, RegionInfo> regionInfos = new HashMap<>();
|
||||||
|
|
||||||
|
MetaTableAccessor.scanMeta(master.getConnection(), tableName,
|
||||||
|
MetaTableAccessor.QueryType.REGION, Integer.MAX_VALUE, r -> {
|
||||||
|
RegionInfo regionInfo = MetaTableAccessor.getRegionInfo(r);
|
||||||
|
regionInfos.put(regionInfo.getRegionNameAsString(), regionInfo);
|
||||||
|
RegionState.State state = RegionStateStore.getRegionState(r, 0);
|
||||||
|
regionStates.put(regionInfo.getEncodedName(), state);
|
||||||
|
if (regionInfo.isSplitParent()) {
|
||||||
|
PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(r);
|
||||||
|
checkRegions.add(daughters.getFirst().getRegionNameAsString());
|
||||||
|
checkRegions.add(daughters.getSecond().getRegionNameAsString());
|
||||||
|
} else if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null
|
||||||
|
|| r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER) != null) {
|
||||||
|
checkRegions.add(regionInfo.getRegionNameAsString());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
// find unassigned merged or split daughter region
|
||||||
|
return checkRegions.stream().map(regionName -> regionInfos.get(regionName))
|
||||||
|
.filter(regionInfo -> !regionInfo.isSplitParent())
|
||||||
|
.filter(regionInfo -> !regionStates.get(regionInfo.getEncodedName())
|
||||||
|
.matches(RegionState.State.OPEN))
|
||||||
|
.map(regionInfo -> regionInfo.getRegionNameAsString()).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> findOrphanRegionOnFS(TableName tableName) throws IOException {
|
||||||
|
// get available regions from meta, merged region should be consider available
|
||||||
|
HashSet<String> regionsInMeta = new HashSet<>();
|
||||||
|
MetaTableAccessor.scanMeta(master.getConnection(), tableName,
|
||||||
|
MetaTableAccessor.QueryType.REGION, Integer.MAX_VALUE, r -> {
|
||||||
|
RegionInfo regionInfo = MetaTableAccessor.getRegionInfo(r);
|
||||||
|
regionsInMeta.add(regionInfo.getEncodedName());
|
||||||
|
if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null
|
||||||
|
|| r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEB_QUALIFIER) != null) {
|
||||||
|
PairOfSameType<RegionInfo> mergedRegions = MetaTableAccessor.getMergeRegions(r);
|
||||||
|
regionsInMeta.add(mergedRegions.getFirst().getEncodedName());
|
||||||
|
regionsInMeta.add(mergedRegions.getSecond().getEncodedName());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
// get regionInfo from fs
|
||||||
|
Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
|
||||||
|
FileStatus[] regions =
|
||||||
|
master.getFileSystem().listStatus(tableDir, path -> !path.getName().startsWith("."));
|
||||||
|
HashMap<String, String> regionNames = new HashMap<>();
|
||||||
|
for (FileStatus region : regions) {
|
||||||
|
RegionInfo regionInfo =
|
||||||
|
HRegionFileSystem.loadRegionInfoFileContent(master.getFileSystem(), region.getPath());
|
||||||
|
regionNames.put(regionInfo.getEncodedName(), regionInfo.getRegionNameAsString());
|
||||||
|
}
|
||||||
|
Iterator<Map.Entry<String, String>> regionIterator = regionNames.entrySet().iterator();
|
||||||
|
while (regionIterator.hasNext()) {
|
||||||
|
Map.Entry<String, String> region = regionIterator.next();
|
||||||
|
if (regionsInMeta.contains(region.getKey())) {
|
||||||
|
regionIterator.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ArrayList<>(regionNames.values());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SwitchRpcThrottleResponse switchRpcThrottle(RpcController controller,
|
public SwitchRpcThrottleResponse switchRpcThrottle(RpcController controller,
|
||||||
SwitchRpcThrottleRequest request) throws ServiceException {
|
SwitchRpcThrottleRequest request) throws ServiceException {
|
||||||
|
|
|
@ -93,8 +93,8 @@ public class SplitTableRegionProcedure
|
||||||
extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
|
extends AbstractStateMachineRegionProcedure<SplitTableRegionState> {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
|
private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class);
|
||||||
private Boolean traceEnabled = null;
|
private Boolean traceEnabled = null;
|
||||||
private RegionInfo daughter_1_RI;
|
private RegionInfo daughterOneRI;
|
||||||
private RegionInfo daughter_2_RI;
|
private RegionInfo daughterTwoRI;
|
||||||
private byte[] bestSplitRow;
|
private byte[] bestSplitRow;
|
||||||
private RegionSplitPolicy splitPolicy;
|
private RegionSplitPolicy splitPolicy;
|
||||||
|
|
||||||
|
@ -113,13 +113,13 @@ public class SplitTableRegionProcedure
|
||||||
checkSplittable(env, regionToSplit, bestSplitRow);
|
checkSplittable(env, regionToSplit, bestSplitRow);
|
||||||
final TableName table = regionToSplit.getTable();
|
final TableName table = regionToSplit.getTable();
|
||||||
final long rid = getDaughterRegionIdTimestamp(regionToSplit);
|
final long rid = getDaughterRegionIdTimestamp(regionToSplit);
|
||||||
this.daughter_1_RI = RegionInfoBuilder.newBuilder(table)
|
this.daughterOneRI = RegionInfoBuilder.newBuilder(table)
|
||||||
.setStartKey(regionToSplit.getStartKey())
|
.setStartKey(regionToSplit.getStartKey())
|
||||||
.setEndKey(bestSplitRow)
|
.setEndKey(bestSplitRow)
|
||||||
.setSplit(false)
|
.setSplit(false)
|
||||||
.setRegionId(rid)
|
.setRegionId(rid)
|
||||||
.build();
|
.build();
|
||||||
this.daughter_2_RI = RegionInfoBuilder.newBuilder(table)
|
this.daughterTwoRI = RegionInfoBuilder.newBuilder(table)
|
||||||
.setStartKey(bestSplitRow)
|
.setStartKey(bestSplitRow)
|
||||||
.setEndKey(regionToSplit.getEndKey())
|
.setEndKey(regionToSplit.getEndKey())
|
||||||
.setSplit(false)
|
.setSplit(false)
|
||||||
|
@ -140,7 +140,7 @@ public class SplitTableRegionProcedure
|
||||||
@Override
|
@Override
|
||||||
protected LockState acquireLock(final MasterProcedureEnv env) {
|
protected LockState acquireLock(final MasterProcedureEnv env) {
|
||||||
if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
|
if (env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(),
|
||||||
daughter_1_RI, daughter_2_RI)) {
|
daughterOneRI, daughterTwoRI)) {
|
||||||
try {
|
try {
|
||||||
LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
|
LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -153,8 +153,18 @@ public class SplitTableRegionProcedure
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void releaseLock(final MasterProcedureEnv env) {
|
protected void releaseLock(final MasterProcedureEnv env) {
|
||||||
env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughter_1_RI,
|
env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI,
|
||||||
daughter_2_RI);
|
daughterTwoRI);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public RegionInfo getDaughterOneRI() {
|
||||||
|
return daughterOneRI;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public RegionInfo getDaughterTwoRI() {
|
||||||
|
return daughterTwoRI;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -416,8 +426,8 @@ public class SplitTableRegionProcedure
|
||||||
MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
|
MasterProcedureProtos.SplitTableRegionStateData.newBuilder()
|
||||||
.setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
|
.setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
|
||||||
.setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
|
.setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion()))
|
||||||
.addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI))
|
.addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI))
|
||||||
.addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI));
|
.addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI));
|
||||||
serializer.serialize(splitTableRegionMsg.build());
|
serializer.serialize(splitTableRegionMsg.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,8 +441,8 @@ public class SplitTableRegionProcedure
|
||||||
setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
|
setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo()));
|
||||||
setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
|
setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo()));
|
||||||
assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
|
assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2);
|
||||||
daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
|
daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0));
|
||||||
daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
|
daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -443,9 +453,9 @@ public class SplitTableRegionProcedure
|
||||||
sb.append(", parent=");
|
sb.append(", parent=");
|
||||||
sb.append(getParentRegion().getShortNameToLog());
|
sb.append(getParentRegion().getShortNameToLog());
|
||||||
sb.append(", daughterA=");
|
sb.append(", daughterA=");
|
||||||
sb.append(daughter_1_RI.getShortNameToLog());
|
sb.append(daughterOneRI.getShortNameToLog());
|
||||||
sb.append(", daughterB=");
|
sb.append(", daughterB=");
|
||||||
sb.append(daughter_2_RI.getShortNameToLog());
|
sb.append(daughterTwoRI.getShortNameToLog());
|
||||||
}
|
}
|
||||||
|
|
||||||
private RegionInfo getParentRegion() {
|
private RegionInfo getParentRegion() {
|
||||||
|
@ -463,7 +473,7 @@ public class SplitTableRegionProcedure
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] getSplitRow() {
|
private byte[] getSplitRow() {
|
||||||
return daughter_2_RI.getStartKey();
|
return daughterTwoRI.getStartKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
|
private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED };
|
||||||
|
@ -595,17 +605,17 @@ public class SplitTableRegionProcedure
|
||||||
Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
|
Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs);
|
||||||
|
|
||||||
assertReferenceFileCount(fs, expectedReferences.getFirst(),
|
assertReferenceFileCount(fs, expectedReferences.getFirst(),
|
||||||
regionFs.getSplitsDir(daughter_1_RI));
|
regionFs.getSplitsDir(daughterOneRI));
|
||||||
//Move the files from the temporary .splits to the final /table/region directory
|
//Move the files from the temporary .splits to the final /table/region directory
|
||||||
regionFs.commitDaughterRegion(daughter_1_RI);
|
regionFs.commitDaughterRegion(daughterOneRI);
|
||||||
assertReferenceFileCount(fs, expectedReferences.getFirst(),
|
assertReferenceFileCount(fs, expectedReferences.getFirst(),
|
||||||
new Path(tabledir, daughter_1_RI.getEncodedName()));
|
new Path(tabledir, daughterOneRI.getEncodedName()));
|
||||||
|
|
||||||
assertReferenceFileCount(fs, expectedReferences.getSecond(),
|
assertReferenceFileCount(fs, expectedReferences.getSecond(),
|
||||||
regionFs.getSplitsDir(daughter_2_RI));
|
regionFs.getSplitsDir(daughterTwoRI));
|
||||||
regionFs.commitDaughterRegion(daughter_2_RI);
|
regionFs.commitDaughterRegion(daughterTwoRI);
|
||||||
assertReferenceFileCount(fs, expectedReferences.getSecond(),
|
assertReferenceFileCount(fs, expectedReferences.getSecond(),
|
||||||
new Path(tabledir, daughter_2_RI.getEncodedName()));
|
new Path(tabledir, daughterTwoRI.getEncodedName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -745,9 +755,9 @@ public class SplitTableRegionProcedure
|
||||||
|
|
||||||
final byte[] splitRow = getSplitRow();
|
final byte[] splitRow = getSplitRow();
|
||||||
final String familyName = Bytes.toString(family);
|
final String familyName = Bytes.toString(family);
|
||||||
final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow,
|
final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow,
|
||||||
false, splitPolicy);
|
false, splitPolicy);
|
||||||
final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow,
|
final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow,
|
||||||
true, splitPolicy);
|
true, splitPolicy);
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
|
LOG.debug("pid=" + getProcId() + " splitting complete for store file: " +
|
||||||
|
@ -812,7 +822,7 @@ public class SplitTableRegionProcedure
|
||||||
*/
|
*/
|
||||||
private void updateMeta(final MasterProcedureEnv env) throws IOException {
|
private void updateMeta(final MasterProcedureEnv env) throws IOException {
|
||||||
env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
|
env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env),
|
||||||
daughter_1_RI, daughter_2_RI);
|
daughterOneRI, daughterTwoRI);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -834,7 +844,7 @@ public class SplitTableRegionProcedure
|
||||||
private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
|
private void postSplitRegion(final MasterProcedureEnv env) throws IOException {
|
||||||
final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
|
||||||
if (cpHost != null) {
|
if (cpHost != null) {
|
||||||
cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser());
|
cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -852,8 +862,8 @@ public class SplitTableRegionProcedure
|
||||||
private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
|
private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
|
List<RegionInfo> hris = new ArrayList<RegionInfo>(2);
|
||||||
hris.add(daughter_1_RI);
|
hris.add(daughterOneRI);
|
||||||
hris.add(daughter_2_RI);
|
hris.add(daughterTwoRI);
|
||||||
return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris,
|
return AssignmentManagerUtil.createAssignProceduresForOpeningNewRegions(env, hris,
|
||||||
getRegionReplication(env), getParentRegionServerName(env));
|
getRegionReplication(env), getParentRegionServerName(env));
|
||||||
}
|
}
|
||||||
|
@ -868,9 +878,9 @@ public class SplitTableRegionProcedure
|
||||||
long maxSequenceId =
|
long maxSequenceId =
|
||||||
WALSplitter.getMaxRegionSequenceId(walFS, getWALRegionDir(env, getParentRegion()));
|
WALSplitter.getMaxRegionSequenceId(walFS, getWALRegionDir(env, getParentRegion()));
|
||||||
if (maxSequenceId > 0) {
|
if (maxSequenceId > 0) {
|
||||||
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_1_RI),
|
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughterOneRI),
|
||||||
maxSequenceId);
|
maxSequenceId);
|
||||||
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughter_2_RI),
|
WALSplitter.writeRegionSequenceIdFile(walFS, getWALRegionDir(env, daughterTwoRI),
|
||||||
maxSequenceId);
|
maxSequenceId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -807,6 +807,12 @@ public class HRegionFileSystem {
|
||||||
Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
|
Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
|
||||||
// Move the tmp dir in the expected location
|
// Move the tmp dir in the expected location
|
||||||
if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
|
if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
|
||||||
|
|
||||||
|
// Write HRI to a file in case we need to recover hbase:meta
|
||||||
|
Path regionInfoFile = new Path(mergedRegionTmpDir, REGION_INFO_FILE);
|
||||||
|
byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
|
||||||
|
writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
|
||||||
|
|
||||||
if (!fs.rename(mergedRegionTmpDir, regionDir)) {
|
if (!fs.rename(mergedRegionTmpDir, regionDir)) {
|
||||||
throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
|
throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
|
||||||
+ regionDir);
|
+ regionDir);
|
||||||
|
|
|
@ -18,17 +18,32 @@
|
||||||
package org.apache.hadoop.hbase.client;
|
package org.apache.hadoop.hbase.client;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.Coprocessor;
|
||||||
|
import org.apache.hadoop.hbase.CoprocessorEnvironment;
|
||||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.MasterObserver;
|
||||||
|
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||||
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
import org.apache.hadoop.hbase.master.RegionState;
|
import org.apache.hadoop.hbase.master.RegionState;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.SplitTableRegionProcedure;
|
||||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
|
import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
|
||||||
import org.apache.hadoop.hbase.procedure2.Procedure;
|
import org.apache.hadoop.hbase.procedure2.Procedure;
|
||||||
|
@ -40,6 +55,7 @@ import org.apache.hadoop.hbase.testclassification.ClientTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.ClassRule;
|
import org.junit.ClassRule;
|
||||||
|
@ -57,6 +73,7 @@ import org.slf4j.LoggerFactory;
|
||||||
import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
|
import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
|
* Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
|
||||||
|
@ -102,6 +119,12 @@ public class TestHbck {
|
||||||
TEST_UTIL.createMultiRegionTable(TABLE_NAME, Bytes.toBytes("family1"), 5);
|
TEST_UTIL.createMultiRegionTable(TABLE_NAME, Bytes.toBytes("family1"), 5);
|
||||||
procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
|
procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
|
||||||
ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
|
ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
|
||||||
|
FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
|
||||||
|
FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
|
@ -204,6 +227,101 @@ public class TestHbck {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecoverMergeAfterMetaUpdated() throws Exception {
|
||||||
|
String testTable = async ? "mergeTestAsync" : "mergeTestSync";
|
||||||
|
TEST_UTIL.createMultiRegionTable(TableName.valueOf(testTable), Bytes.toBytes("family1"), 5);
|
||||||
|
TEST_UTIL.loadTable(TEST_UTIL.getConnection().getTable(TableName.valueOf(testTable)),
|
||||||
|
Bytes.toBytes("family1"), true);
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
Hbck hbck = getHbck();
|
||||||
|
FailingMergeAfterMetaUpdatedMasterObserver observer = master.getMasterCoprocessorHost()
|
||||||
|
.findCoprocessor(FailingMergeAfterMetaUpdatedMasterObserver.class);
|
||||||
|
try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
|
||||||
|
List<RegionInfo> regions = admin.getRegions(TableName.valueOf(testTable));
|
||||||
|
admin.mergeRegionsAsync(regions.get(0).getRegionName(), regions.get(1).getRegionName(), true);
|
||||||
|
assertNotNull(observer);
|
||||||
|
observer.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Map<String, MasterProtos.RegionErrorType> result =
|
||||||
|
hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(0, result.size());
|
||||||
|
Optional<Procedure<?>> procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures()
|
||||||
|
.stream().filter(p -> p instanceof MergeTableRegionsProcedure).findAny();
|
||||||
|
Assert.assertTrue(procedure.isPresent());
|
||||||
|
hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false);
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(1, result.size());
|
||||||
|
hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream()
|
||||||
|
.map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList()));
|
||||||
|
ProcedureTestingUtility.waitNoProcedureRunning(master.getMasterProcedureExecutor());
|
||||||
|
// now the state should be fixed
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(0, result.size());
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
throw new IOException(ie);
|
||||||
|
} finally {
|
||||||
|
observer.resetLatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecoverSplitAfterMetaUpdated() throws Exception {
|
||||||
|
String testTable = async ? "splitTestAsync" : "splitTestSync";
|
||||||
|
TEST_UTIL.createMultiRegionTable(TableName.valueOf(testTable), Bytes.toBytes("family1"), 5);
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
Hbck hbck = getHbck();
|
||||||
|
FailingSplitAfterMetaUpdatedMasterObserver observer = master.getMasterCoprocessorHost()
|
||||||
|
.findCoprocessor(FailingSplitAfterMetaUpdatedMasterObserver.class);
|
||||||
|
assertNotNull(observer);
|
||||||
|
try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
|
||||||
|
byte[] splitKey = Bytes.toBytes("bcd");
|
||||||
|
admin.split(TableName.valueOf(testTable), splitKey);
|
||||||
|
observer.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
Map<String, MasterProtos.RegionErrorType> result =
|
||||||
|
hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
// since there is a split procedure work on the region, thus this check should return a empty
|
||||||
|
// map.
|
||||||
|
Assert.assertEquals(0, result.size());
|
||||||
|
Optional<Procedure<?>> procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures()
|
||||||
|
.stream().filter(p -> p instanceof SplitTableRegionProcedure).findAny();
|
||||||
|
Assert.assertTrue(procedure.isPresent());
|
||||||
|
hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false);
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(2, result.size());
|
||||||
|
hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream()
|
||||||
|
.map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList()));
|
||||||
|
ProcedureTestingUtility.waitNoProcedureRunning(master.getMasterProcedureExecutor());
|
||||||
|
// now the state should be fixed
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(0, result.size());
|
||||||
|
|
||||||
|
//split one of the daughter region again
|
||||||
|
observer.resetLatch();
|
||||||
|
byte[] splitKey2 = Bytes.toBytes("bcde");
|
||||||
|
|
||||||
|
admin.split(TableName.valueOf(testTable), splitKey2);
|
||||||
|
observer.latch.await(5000, TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
procedure = TEST_UTIL.getHBaseCluster().getMaster().getProcedures()
|
||||||
|
.stream().filter(p -> p instanceof SplitTableRegionProcedure).findAny();
|
||||||
|
Assert.assertTrue(procedure.isPresent());
|
||||||
|
hbck.bypassProcedure(Arrays.asList(procedure.get().getProcId()), 5, true, false);
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(2, result.size());
|
||||||
|
hbck.assigns(Arrays.asList(result.keySet().toArray(new String[0])).stream()
|
||||||
|
.map(regionName -> regionName.split("\\.")[1]).collect(Collectors.toList()));
|
||||||
|
ProcedureTestingUtility.waitNoProcedureRunning(master.getMasterProcedureExecutor());
|
||||||
|
// now the state should be fixed
|
||||||
|
result = hbck.getFailedSplitMergeLegacyRegions(Arrays.asList(TableName.valueOf(testTable)));
|
||||||
|
Assert.assertEquals(0, result.size());
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
throw new IOException(ie);
|
||||||
|
} finally {
|
||||||
|
observer.resetLatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testScheduleSCP() throws Exception {
|
public void testScheduleSCP() throws Exception {
|
||||||
HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
|
HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
|
||||||
|
@ -223,6 +341,60 @@ public class TestHbck {
|
||||||
waitOnPids(pids);
|
waitOnPids(pids);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class FailingSplitAfterMetaUpdatedMasterObserver
|
||||||
|
implements MasterCoprocessor, MasterObserver {
|
||||||
|
public volatile CountDownLatch latch;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start(CoprocessorEnvironment e) throws IOException {
|
||||||
|
resetLatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<MasterObserver> getMasterObserver() {
|
||||||
|
return Optional.of(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx)
|
||||||
|
throws IOException {
|
||||||
|
LOG.info("I'm here");
|
||||||
|
latch.countDown();
|
||||||
|
throw new IOException("this procedure will fail at here forever");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void resetLatch() {
|
||||||
|
this.latch = new CountDownLatch(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class FailingMergeAfterMetaUpdatedMasterObserver
|
||||||
|
implements MasterCoprocessor, MasterObserver {
|
||||||
|
public volatile CountDownLatch latch;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start(CoprocessorEnvironment e) throws IOException {
|
||||||
|
resetLatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Optional<MasterObserver> getMasterObserver() {
|
||||||
|
return Optional.of(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void resetLatch() {
|
||||||
|
this.latch = new CountDownLatch(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void postMergeRegionsCommitAction(
|
||||||
|
final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge,
|
||||||
|
final RegionInfo mergedRegion) throws IOException {
|
||||||
|
latch.countDown();
|
||||||
|
throw new IOException("this procedure will fail at here forever");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void waitOnPids(List<Long> pids) {
|
private void waitOnPids(List<Long> pids) {
|
||||||
TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
|
TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue