HBASE-25323 Fix potential NPE when the zookeeper path of RegionServerTracker does not exist when start (#2702)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Guanghao Zhang <zghao@apache.org>
This commit is contained in:
Qi Yu 2020-11-25 15:13:09 +08:00 committed by Guanghao Zhang
parent 6f07efb971
commit 3dd425abfa
3 changed files with 20 additions and 15 deletions

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master;
import java.io.IOException; import java.io.IOException;
import java.io.InterruptedIOException; import java.io.InterruptedIOException;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -35,6 +36,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKListener;
import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths; import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -129,22 +131,25 @@ public class RegionServerTracker extends ZKListener {
splittingServersFromWALDir.stream().filter(s -> !deadServersFromPE.contains(s)). splittingServersFromWALDir.stream().filter(s -> !deadServersFromPE.contains(s)).
forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s)); forEach(s -> LOG.error("{} has no matching ServerCrashProcedure", s));
//create ServerNode for all possible live servers from wal directory //create ServerNode for all possible live servers from wal directory
liveServersFromWALDir.stream() liveServersFromWALDir
.forEach(sn -> server.getAssignmentManager().getRegionStates().getOrCreateServer(sn)); .forEach(sn -> server.getAssignmentManager().getRegionStates().getOrCreateServer(sn));
watcher.registerListener(this); watcher.registerListener(this);
synchronized (this) { synchronized (this) {
List<String> servers = List<String> servers =
ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.getZNodePaths().rsZNode); ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.getZNodePaths().rsZNode);
if (null != servers) {
for (String n : servers) { for (String n : servers) {
Pair<ServerName, RegionServerInfo> pair = getServerInfo(n); Pair<ServerName, RegionServerInfo> pair = getServerInfo(n);
ServerName serverName = pair.getFirst(); ServerName serverName = pair.getFirst();
RegionServerInfo info = pair.getSecond(); RegionServerInfo info = pair.getSecond();
regionServers.add(serverName); regionServers.add(serverName);
ServerMetrics serverMetrics = info != null ? ServerMetricsBuilder.of(serverName, ServerMetrics serverMetrics = info != null ?
VersionInfoUtil.getVersionNumber(info.getVersionInfo()), ServerMetricsBuilder.of(serverName, VersionInfoUtil.getVersionNumber(info.getVersionInfo()),
info.getVersionInfo().getVersion()) : ServerMetricsBuilder.of(serverName); info.getVersionInfo().getVersion()) :
ServerMetricsBuilder.of(serverName);
serverManager.checkAndRecordNewServer(serverName, serverMetrics); serverManager.checkAndRecordNewServer(serverName, serverMetrics);
} }
}
serverManager.findDeadServersAndProcess(deadServersFromPE, liveServersFromWALDir); serverManager.findDeadServersAndProcess(deadServersFromPE, liveServersFromWALDir);
} }
} }
@ -163,8 +168,9 @@ public class RegionServerTracker extends ZKListener {
server.abort("Unexpected zk exception getting RS nodes", e); server.abort("Unexpected zk exception getting RS nodes", e);
return; return;
} }
Set<ServerName> servers = Set<ServerName> servers = CollectionUtils.isEmpty(names) ? Collections.emptySet() :
names.stream().map(ServerName::parseServerName).collect(Collectors.toSet()); names.stream().map(ServerName::parseServerName).collect(Collectors.toSet());
for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) { for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) {
ServerName sn = iter.next(); ServerName sn = iter.next();
if (!servers.contains(sn)) { if (!servers.contains(sn)) {

View File

@ -297,7 +297,7 @@ public class MasterAddressTracker extends ZKNodeTracker {
public static List<ServerName> getBackupMastersAndRenewWatch( public static List<ServerName> getBackupMastersAndRenewWatch(
ZKWatcher zkw) throws InterruptedIOException { ZKWatcher zkw) throws InterruptedIOException {
// Build Set of backup masters from ZK nodes // Build Set of backup masters from ZK nodes
List<String> backupMasterStrings = Collections.emptyList(); List<String> backupMasterStrings = null;
try { try {
backupMasterStrings = ZKUtil.listChildrenAndWatchForNewChildren(zkw, backupMasterStrings = ZKUtil.listChildrenAndWatchForNewChildren(zkw,
zkw.getZNodePaths().backupMasterAddressesZNode); zkw.getZNodePaths().backupMasterAddressesZNode);

View File

@ -445,16 +445,15 @@ public final class ZKUtil {
} catch(KeeperException.NoNodeException ke) { } catch(KeeperException.NoNodeException ke) {
LOG.debug(zkw.prefix("Unable to list children of znode " + znode + " " + LOG.debug(zkw.prefix("Unable to list children of znode " + znode + " " +
"because node does not exist (not an error)")); "because node does not exist (not an error)"));
return null;
} catch (KeeperException e) { } catch (KeeperException e) {
LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e); LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e);
zkw.keeperException(e); zkw.keeperException(e);
return null;
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e); LOG.warn(zkw.prefix("Unable to list children of znode " + znode + " "), e);
zkw.interruptedException(e); zkw.interruptedException(e);
return null;
} }
return null;
} }
/** /**