HBASE-23693 Split failure may cause region hole and data loss when use zk assign (#1071)

Signed-off-by: stack <stack@apache.org>
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
thangTang 2020-02-11 00:57:30 +08:00 committed by Wellington Chevreuil
parent bb49aa205c
commit 942bb77d84
2 changed files with 73 additions and 7 deletions

View File

@ -840,6 +840,20 @@ public class MetaTableAccessor {
cell.getValueOffset(), cell.getValueLength()); cell.getValueOffset(), cell.getValueLength());
} }
/**
* Returns the daughter regions by reading the corresponding columns of the catalog table
* Result.
* @param connection connection we're using
* @param parent region information of parent
* @return a pair of HRegionInfo or PairOfSameType(null, null) if the region is not a split
* parent
*/
public static PairOfSameType<HRegionInfo> getDaughterRegionsFromParent(
final Connection connection, HRegionInfo parent) throws IOException {
Result parentResult = getRegionResult(connection, parent.getRegionName());
return getDaughterRegions(parentResult);
}
/** /**
* Returns the daughter regions by reading the corresponding columns of the catalog table * Returns the daughter regions by reading the corresponding columns of the catalog table
* Result. * Result.

View File

@ -44,13 +44,17 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableStateManager; import org.apache.hadoop.hbase.TableStateManager;
import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ConfigUtil; import org.apache.hadoop.hbase.util.ConfigUtil;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
@ -737,11 +741,13 @@ public class RegionStates {
public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) { public List<HRegionInfo> serverOffline(final ZooKeeperWatcher watcher, final ServerName sn) {
// Offline all regions on this server not already in transition. // Offline all regions on this server not already in transition.
List<HRegionInfo> rits = new ArrayList<HRegionInfo>(); List<HRegionInfo> rits = new ArrayList<HRegionInfo>();
Set<HRegionInfo> regionsToClean = new HashSet<HRegionInfo>(); Set<Pair<HRegionInfo, HRegionInfo>> regionsToClean =
new HashSet<Pair<HRegionInfo, HRegionInfo>>();
// Offline regions outside the loop and synchronized block to avoid // Offline regions outside the loop and synchronized block to avoid
// ConcurrentModificationException and deadlock in case of meta anassigned, // ConcurrentModificationException and deadlock in case of meta anassigned,
// but RegionState a blocked. // but RegionState a blocked.
Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>(); Set<HRegionInfo> regionsToOffline = new HashSet<HRegionInfo>();
Map<String, HRegionInfo> daughter2Parent = new HashMap<>();
synchronized (this) { synchronized (this) {
Set<HRegionInfo> assignedRegions = serverHoldings.get(sn); Set<HRegionInfo> assignedRegions = serverHoldings.get(sn);
if (assignedRegions == null) { if (assignedRegions == null) {
@ -758,8 +764,20 @@ public class RegionStates {
// Delete the ZNode if exists // Delete the ZNode if exists
ZKAssign.deleteNodeFailSilent(watcher, region); ZKAssign.deleteNodeFailSilent(watcher, region);
regionsToOffline.add(region); regionsToOffline.add(region);
PairOfSameType<HRegionInfo> daughterRegions =
MetaTableAccessor.getDaughterRegionsFromParent(this.server.getConnection(), region);
if (daughterRegions != null) {
if (daughterRegions.getFirst() != null) {
daughter2Parent.put(daughterRegions.getFirst().getEncodedName(), region);
}
if (daughterRegions.getSecond() != null) {
daughter2Parent.put(daughterRegions.getSecond().getEncodedName(), region);
}
}
} catch (KeeperException ke) { } catch (KeeperException ke) {
server.abort("Unexpected ZK exception deleting node " + region, ke); server.abort("Unexpected ZK exception deleting node " + region, ke);
} catch (IOException e) {
LOG.warn("get daughter from meta exception " + region, e);
} }
} }
} }
@ -783,10 +801,20 @@ public class RegionStates {
LOG.info("Found region in " + state + LOG.info("Found region in " + state +
" to be reassigned by ServerCrashProcedure for " + sn); " to be reassigned by ServerCrashProcedure for " + sn);
rits.add(hri); rits.add(hri);
} else if(state.isSplittingNew() || state.isMergingNew()) { } else if (state.isSplittingNew() || state.isMergingNew()) {
LOG.info("Offline/Cleanup region if no meta entry exists, hri: " + hri + LOG.info(
" state: " + state); "Offline/Cleanup region if no meta entry exists, hri: " + hri + " state: " + state);
regionsToClean.add(state.getRegion()); if (daughter2Parent.containsKey(hri.getEncodedName())) {
HRegionInfo parent = daughter2Parent.get(hri.getEncodedName());
HRegionInfo info = getHRIFromMeta(parent);
if (info != null && info.isSplit() && info.isOffline()) {
regionsToClean.add(Pair.newPair(state.getRegion(), info));
} else {
regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
}
} else {
regionsToClean.add(Pair.<HRegionInfo, HRegionInfo>newPair(state.getRegion(), null));
}
} else { } else {
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
} }
@ -803,6 +831,19 @@ public class RegionStates {
return rits; return rits;
} }
private HRegionInfo getHRIFromMeta(HRegionInfo parent) {
Result result = null;
try {
result =
MetaTableAccessor.getRegionResult(this.server.getConnection(), parent.getRegionName());
HRegionInfo info = MetaTableAccessor.getHRegionInfo(result);
return info;
} catch (IOException e) {
LOG.error("got exception when query meta with region " + parent.getEncodedName(), e);
return null;
}
}
/** /**
* This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held. * This method does an RPC to hbase:meta. Do not call this method with a lock/synchronize held.
* In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if * In ZK mode we rollback and hence cleanup daughters/merged region. We also cleanup if
@ -810,12 +851,14 @@ public class RegionStates {
* *
* @param hris The hris to check if empty in hbase:meta and if so, clean them up. * @param hris The hris to check if empty in hbase:meta and if so, clean them up.
*/ */
private void cleanFailedSplitMergeRegions(Set<HRegionInfo> hris) { private void cleanFailedSplitMergeRegions(Set<Pair<HRegionInfo, HRegionInfo>> hris) {
if (hris.isEmpty()) { if (hris.isEmpty()) {
return; return;
} }
for (HRegionInfo hri : hris) { for (Pair<HRegionInfo, HRegionInfo> hriPair : hris) {
HRegionInfo hri = hriPair.getFirst();
HRegionInfo parentInfo = hriPair.getSecond();
// This is RPC to meta table. It is done while we have a synchronize on // This is RPC to meta table. It is done while we have a synchronize on
// regionstates. No progress will be made if meta is not available at this time. // regionstates. No progress will be made if meta is not available at this time.
// This is a cleanup task. Not critical. // This is a cleanup task. Not critical.
@ -829,6 +872,15 @@ public class RegionStates {
if (regionPair != null) { if (regionPair != null) {
MetaTableAccessor.deleteRegion(this.server.getConnection(), hri); MetaTableAccessor.deleteRegion(this.server.getConnection(), hri);
} }
if (parentInfo != null) {
List<Mutation> mutations = new ArrayList<Mutation>();
HRegionInfo copyOfParent = new HRegionInfo(parentInfo);
copyOfParent.setOffline(false);
copyOfParent.setSplit(false);
Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
mutations.add(putParent);
MetaTableAccessor.mutateMetaTable(this.server.getConnection(), mutations);
}
LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri); LOG.debug("Cleaning up HDFS since no meta entry exists, hri: " + hri);
FSUtils.deleteRegionDir(server.getConfiguration(), hri); FSUtils.deleteRegionDir(server.getConfiguration(), hri);
} }