HBASE-11594 Unhandled NoNodeException in distributed log replay mode (Jeffrey Zhong)

This commit is contained in:
Andrew Purtell 2014-08-01 09:37:56 -07:00
parent 19e9b8aa52
commit b727d1e181
1 changed files with 32 additions and 24 deletions

View File

@ -145,6 +145,7 @@ import org.apache.hadoop.metrics.util.MBeanUtil;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.data.Stat;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -2852,31 +2853,38 @@ public class HRegionServer extends HasThread implements
minSeqIdForLogReplay = storeSeqIdForReplay; minSeqIdForLogReplay = storeSeqIdForReplay;
} }
} }
long lastRecordedFlushedSequenceId = -1;
String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode,
region.getEncodedName());
// recovering-region level
byte[] data;
try { try {
data = ZKUtil.getData(zkw, nodePath); long lastRecordedFlushedSequenceId = -1;
} catch (InterruptedException e) { String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode,
throw new InterruptedIOException(); region.getEncodedName());
} // recovering-region level
if (data != null) { byte[] data;
lastRecordedFlushedSequenceId = SplitLogManager.parseLastFlushedSequenceIdFrom(data); try {
} data = ZKUtil.getData(zkw, nodePath);
if (data == null || lastRecordedFlushedSequenceId < minSeqIdForLogReplay) { } catch (InterruptedException e) {
ZKUtil.setData(zkw, nodePath, ZKUtil.positionToByteArray(minSeqIdForLogReplay)); throw new InterruptedIOException();
} }
if (previousRSName != null) { if (data != null) {
// one level deeper for the failed RS lastRecordedFlushedSequenceId = SplitLogManager.parseLastFlushedSequenceIdFrom(data);
nodePath = ZKUtil.joinZNode(nodePath, previousRSName); }
ZKUtil.setData(zkw, nodePath, if (data == null || lastRecordedFlushedSequenceId < minSeqIdForLogReplay) {
ZKUtil.regionSequenceIdsToByteArray(minSeqIdForLogReplay, maxSeqIdInStores)); ZKUtil.setData(zkw, nodePath, ZKUtil.positionToByteArray(minSeqIdForLogReplay));
LOG.debug("Update last flushed sequence id of region " + region.getEncodedName() + " for " }
+ previousRSName); if (previousRSName != null) {
} else { // one level deeper for the failed RS
LOG.warn("Can't find failed region server for recovering region " + region.getEncodedName()); nodePath = ZKUtil.joinZNode(nodePath, previousRSName);
ZKUtil.setData(zkw, nodePath,
ZKUtil.regionSequenceIdsToByteArray(minSeqIdForLogReplay, maxSeqIdInStores));
LOG.debug("Update last flushed sequence id of region " + region.getEncodedName() + " for "
+ previousRSName);
} else {
LOG.warn("Can't find failed region server for recovering region " +
region.getEncodedName());
}
} catch (NoNodeException ignore) {
LOG.debug("Region " + region.getEncodedName() +
" must have completed recovery because its recovery znode has been removed", ignore);
} }
} }