HBASE-23281: Track meta region locations in masters (#830)

* HBASE-23281: Track meta region changes on masters

This patch adds a simple cache that tracks the meta region replica
locations. It keeps an eye on the region movements so that the
cached locations are not stale.

This information is used for servicing client RPCs for connections
that use master based registry (HBASE-18095). The RPC end points
will be added in a separate patch.

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
Signed-off-by: Andrew Purtell <apurtell@apache.org>
(cherry picked from commit 8571d389cf)
(cherry picked from commit 89581d9d21)
This commit is contained in:
Bharath Vissapragada 2019-12-04 15:26:58 -08:00
parent e4161e5875
commit 9a1d5a02b0
No known key found for this signature in database
GPG Key ID: 18AE42A0B5A93FA7
7 changed files with 728 additions and 34 deletions

View File

@ -86,6 +86,7 @@ import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.PackagePrivateFieldAccessor;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
@ -95,6 +96,7 @@ import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.io.LimitInputStream;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.generated.AccessControlProtos;
import org.apache.hadoop.hbase.protobuf.generated.AccessControlProtos.AccessControlService;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
@ -158,6 +160,7 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor.Flus
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor.EventType;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.quotas.QuotaScope;
import org.apache.hadoop.hbase.quotas.QuotaType;
import org.apache.hadoop.hbase.quotas.ThrottleType;
@ -170,6 +173,7 @@ import org.apache.hadoop.hbase.security.access.UserPermission;
import org.apache.hadoop.hbase.security.token.AuthenticationTokenIdentifier;
import org.apache.hadoop.hbase.security.visibility.Authorizations;
import org.apache.hadoop.hbase.security.visibility.CellVisibility;
import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.DynamicClassLoader;
@ -3657,4 +3661,86 @@ public final class ProtobufUtil {
}
return Collections.emptySet();
}
/**
* Get the Meta region state from the passed data bytes. Can handle both old and new style
* server names.
* @param data protobuf serialized data with meta server name.
* @param replicaId replica ID for this region
* @return RegionState instance corresponding to the serialized data.
* @throws DeserializationException if the data is invalid.
*/
public static RegionState parseMetaRegionStateFrom(final byte[] data, int replicaId)
throws DeserializationException {
RegionState.State state = RegionState.State.OPEN;
ServerName serverName;
if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
try {
int prefixLen = ProtobufUtil.lengthOfPBMagic();
ZooKeeperProtos.MetaRegionServer rl =
ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom(data, prefixLen,
data.length - prefixLen);
if (rl.hasState()) {
state = RegionState.State.convert(rl.getState());
}
HBaseProtos.ServerName sn = rl.getServer();
serverName = ServerName.valueOf(
sn.getHostName(), sn.getPort(), sn.getStartCode());
} catch (InvalidProtocolBufferException e) {
throw new DeserializationException("Unable to parse meta region location");
}
} else {
// old style of meta region location?
serverName = parseServerNameFrom(data);
}
if (serverName == null) {
state = RegionState.State.OFFLINE;
}
return new RegionState(RegionReplicaUtil.getRegionInfoForReplica(
HRegionInfo.FIRST_META_REGIONINFO, replicaId), state, serverName);
}
/**
* Get a ServerName from the passed in data bytes.
* @param data Data with a serialize server name in it; can handle the old style
* servername where servername was host and port. Works too with data that
* begins w/ the pb 'PBUF' magic and that is then followed by a protobuf that
* has a serialized {@link ServerName} in it.
* @return Returns null if <code>data</code> is null else converts passed data
* to a ServerName instance.
* @throws DeserializationException
*/
public static ServerName parseServerNameFrom(final byte [] data) throws DeserializationException {
if (data == null || data.length <= 0) return null;
if (isPBMagicPrefix(data)) {
int prefixLen = lengthOfPBMagic();
try {
ZooKeeperProtos.Master rss =
ZooKeeperProtos.Master.PARSER.parseFrom(data, prefixLen, data.length - prefixLen);
org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName sn =
rss.getMaster();
return ServerName.valueOf(sn.getHostName(), sn.getPort(), sn.getStartCode());
} catch (/*InvalidProtocolBufferException*/IOException e) {
// A failed parse of the znode is pretty catastrophic. Rather than loop
// retrying hoping the bad bytes will changes, and rather than change
// the signature on this method to add an IOE which will send ripples all
// over the code base, throw a RuntimeException. This should "never" happen.
// Fail fast if it does.
throw new DeserializationException(e);
}
}
// The str returned could be old style -- pre hbase-1502 -- which was
// hostname and port seperated by a colon rather than hostname, port and
// startcode delimited by a ','.
String str = Bytes.toString(data);
int index = str.indexOf(ServerName.SERVERNAME_SEPARATOR);
if (index != -1) {
// Presume its ServerName serialized with versioned bytes.
return ServerName.parseVersionedServerName(data);
}
// Presume it a hostname:port format.
String hostname = Addressing.parseHostname(str);
int port = Addressing.parsePort(str);
return ServerName.valueOf(hostname, port, -1L);
}
}

View File

@ -1,4 +1,4 @@
/**
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@ -18,6 +18,11 @@
*/
package org.apache.hadoop.hbase.zookeeper;
import static org.apache.hadoop.hbase.HConstants.DEFAULT_META_REPLICA_NUM;
import static org.apache.hadoop.hbase.HConstants.META_REPLICAS_NUM;
import static org.apache.hadoop.hbase.HRegionInfo.DEFAULT_REPLICA_ID;
import static org.apache.hadoop.hbase.zookeeper.ZKUtil.joinZNode;
import com.google.common.collect.ImmutableMap;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
@ -39,7 +44,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.AuthUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.security.Superusers;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@ -71,6 +75,9 @@ import org.apache.zookeeper.data.Stat;
public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);
public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver";
public static final String META_ZNODE_PREFIX = "meta-region-server";
// Identifier for this watcher (for logging only). It is made of the prefix
// passed on construction and the zookeeper sessionid.
private String prefix;
@ -91,6 +98,11 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
private final List<ZooKeeperListener> listeners =
new CopyOnWriteArrayList<ZooKeeperListener>();
/**
* znodes containing the locations of the servers hosting the meta replicas
*/
private final ImmutableMap<Integer, String> metaReplicaZNodes;
// Single threaded executor pool that processes event notifications from Zookeeper. Events are
// processed in the order in which they arrive (pool backed by an unbounded fifo queue). We do
// this to decouple the event processing from Zookeeper's ClientCnxn's EventThread context.
@ -148,6 +160,13 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
// znode of indicating master maintenance mode
public static String masterMaintZNode = "masterMaintenance";
/**
* The prefix of meta znode. Does not include baseZNode.
* Its a 'prefix' because meta replica id integer can be tagged on the end (if
* no number present, it is 'default' replica).
*/
private final String metaZNodePrefix;
// Certain ZooKeeper nodes need to be world-readable
public static final ArrayList<ACL> CREATOR_ALL_AND_WORLD_READABLE =
new ArrayList<ACL>() { {
@ -155,7 +174,6 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
add(new ACL(ZooDefs.Perms.ALL,ZooDefs.Ids.AUTH_IDS));
}};
public final static String META_ZNODE_PREFIX = "meta-region-server";
private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup";
private final Configuration conf;
@ -202,6 +220,15 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
PendingWatcher pendingWatcher = new PendingWatcher();
this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, pendingWatcher, identifier);
pendingWatcher.prepare(this);
ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
String defaultMetaReplicaZNode = joinZNode(baseZNode, metaZNodePrefix);
builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode);
int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM);
for (int i = 1; i < numMetaReplicas; i++) {
builder.put(i, defaultMetaReplicaZNode + "-" + i);
}
metaReplicaZNodes = builder.build();
if (canCreateBaseZNode) {
try {
createBaseZNodes();
@ -219,6 +246,13 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
HConstants.ZK_SYNC_BLOCKING_TIMEOUT_DEFAULT_MS);
}
/**
* @return true if the znode is a meta region replica
*/
public boolean isAnyMetaReplicaZNode(String node) {
return this.metaReplicaZNodes.containsValue(node);
}
private void createBaseZNodes() throws ZooKeeperConnectionException {
try {
// Create all the necessary "directories" of znodes
@ -296,7 +330,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
List<String> children = recoverableZooKeeper.getChildren(znode, false);
for (String child : children) {
setZnodeAclsRecursive(ZKUtil.joinZNode(znode, child));
setZnodeAclsRecursive(joinZNode(znode, child));
}
List<ACL> acls = ZKUtil.createACL(this, znode, true);
LOG.info("Setting ACLs for znode:" + znode + " , acl:" + acls);
@ -446,47 +480,47 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
private void setNodeNames(Configuration conf) {
baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
metaReplicaZnodes.put(0, ZKUtil.joinZNode(baseZNode,
metaReplicaZnodes.put(0, joinZNode(baseZNode,
conf.get("zookeeper.znode.metaserver", "meta-region-server")));
int numMetaReplicas = conf.getInt(HConstants.META_REPLICAS_NUM,
HConstants.DEFAULT_META_REPLICA_NUM);
int numMetaReplicas = conf.getInt(META_REPLICAS_NUM,
DEFAULT_META_REPLICA_NUM);
for (int i = 1; i < numMetaReplicas; i++) {
String str = ZKUtil.joinZNode(baseZNode,
String str = joinZNode(baseZNode,
conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i);
metaReplicaZnodes.put(i, str);
}
rsZNode = ZKUtil.joinZNode(baseZNode,
rsZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.rs", "rs"));
drainingZNode = ZKUtil.joinZNode(baseZNode,
drainingZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.draining.rs", "draining"));
masterAddressZNode = ZKUtil.joinZNode(baseZNode,
masterAddressZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.master", "master"));
backupMasterAddressesZNode = ZKUtil.joinZNode(baseZNode,
backupMasterAddressesZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.backup.masters", "backup-masters"));
clusterStateZNode = ZKUtil.joinZNode(baseZNode,
clusterStateZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.state", "running"));
assignmentZNode = ZKUtil.joinZNode(baseZNode,
assignmentZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.unassigned", "region-in-transition"));
tableZNode = ZKUtil.joinZNode(baseZNode,
tableZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.tableEnableDisable", "table"));
clusterIdZNode = ZKUtil.joinZNode(baseZNode,
clusterIdZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.clusterId", "hbaseid"));
splitLogZNode = ZKUtil.joinZNode(baseZNode,
splitLogZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME));
balancerZNode = ZKUtil.joinZNode(baseZNode,
balancerZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.balancer", "balancer"));
regionNormalizerZNode = ZKUtil.joinZNode(baseZNode,
regionNormalizerZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.regionNormalizer", "normalizer"));
switchZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.switch", "switch"));
tableLockZNode = ZKUtil.joinZNode(baseZNode,
switchZNode = joinZNode(baseZNode, conf.get("zookeeper.znode.switch", "switch"));
tableLockZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.tableLock", "table-lock"));
snapshotCleanupZNode = ZKUtil.joinZNode(baseZNode,
snapshotCleanupZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.snapshot.cleanup", DEFAULT_SNAPSHOT_CLEANUP_ZNODE));
recoveringRegionsZNode = ZKUtil.joinZNode(baseZNode,
recoveringRegionsZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
namespaceZNode = ZKUtil.joinZNode(baseZNode,
namespaceZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.namespace", "namespace"));
masterMaintZNode = ZKUtil.joinZNode(baseZNode,
masterMaintZNode = joinZNode(baseZNode,
conf.get("zookeeper.znode.masterMaintenance", "master-maintenance"));
}
@ -508,7 +542,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
* @return true or false
*/
public boolean isDefaultMetaReplicaZnode(String node) {
if (getZNodeForReplica(HRegionInfo.DEFAULT_REPLICA_ID).equals(node)) {
if (getZNodeForReplica(DEFAULT_REPLICA_ID).equals(node)) {
return true;
}
return false;
@ -542,7 +576,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
// This is mostly needed for tests that attempt to create meta replicas
// from outside the master
if (str == null) {
str = ZKUtil.joinZNode(baseZNode,
str = joinZNode(baseZNode,
conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + replicaId);
}
return str;
@ -555,7 +589,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
*/
public int getMetaReplicaIdFromZnode(String znode) {
String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server");
if (znode.equals(pattern)) return HRegionInfo.DEFAULT_REPLICA_ID;
if (znode.equals(pattern)) return DEFAULT_REPLICA_ID;
// the non-default replicas are of the pattern meta-region-server-<replicaId>
String nonDefaultPattern = pattern + "-";
return Integer.parseInt(znode.substring(nonDefaultPattern.length()));
@ -868,4 +902,45 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
public String getSwitchZNode() {
return switchZNode;
}
/**
* Parses the meta replicaId from the passed path.
* @param path the name of the full path which includes baseZNode.
* @return replicaId
*/
public int getMetaReplicaIdFromPath(String path) {
// Extract the znode from path. The prefix is of the following format.
// baseZNode + PATH_SEPARATOR.
int prefixLen = baseZNode.length() + 1;
return getMetaReplicaIdFromZnode(path.substring(prefixLen));
}
/**
* Same as {@link #getMetaReplicaNodes()} except that this also registers a watcher on base znode
* for subsequent CREATE/DELETE operations on child nodes.
*/
public List<String> getMetaReplicaNodesAndWatchChildren() throws KeeperException {
List<String> childrenOfBaseNode =
ZKUtil.listChildrenAndWatchForNewChildren(this, baseZNode);
return filterMetaReplicaNodes(childrenOfBaseNode);
}
/**
* @param nodes Input list of znodes
* @return Filtered list of znodes from nodes that belong to meta replica(s).
*/
private List<String> filterMetaReplicaNodes(List<String> nodes) {
if (nodes == null || nodes.isEmpty()) {
return new ArrayList<>();
}
List<String> metaReplicaNodes = new ArrayList<>(2);
String pattern = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
for (String child : nodes) {
if (child.startsWith(pattern)) {
metaReplicaNodes.add(child);
}
}
return metaReplicaNodes;
}
}

View File

@ -1,5 +1,4 @@
/**
*
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -307,6 +306,12 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
private RegionsRecoveryConfigManager regionsRecoveryConfigManager = null;
/**
* Cache for the meta region replica's locations. Also tracks their changes to avoid stale
* cache entries.
*/
private final MetaRegionLocationCache metaRegionLocationCache;
// buffer for "fatal error" notices from region servers
// in the cluster. This is only used for assisting
// operations/debugging.
@ -517,11 +522,13 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// Some unit tests don't need a cluster, so no zookeeper at all
if (!conf.getBoolean("hbase.testing.nocluster", false)) {
this.metaRegionLocationCache = new MetaRegionLocationCache(this.zooKeeper);
setInitLatch(new CountDownLatch(1));
activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
int infoPort = putUpJettyServer();
startActiveMasterManager(infoPort);
} else {
this.metaRegionLocationCache = null;
activeMasterManager = null;
}
cachedClusterId = new CachedClusterId(conf);
@ -3444,4 +3451,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
}
return cachedClusterId.getFromCacheOrFetch();
}
public MetaRegionLocationCache getMetaRegionLocationCache() {
return this.metaRegionLocationCache;
}
}

View File

@ -0,0 +1,256 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import static org.apache.hadoop.hbase.zookeeper.ZKUtil.joinZNode;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.concurrent.ThreadFactory;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.types.CopyOnWriteArrayMap;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.RetryCounterFactory;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* A cache of meta region location metadata. Registers a listener on ZK to track changes to the
* meta table znodes. Clients are expected to retry if the meta information is stale. This class
* is thread-safe (a single instance of this class can be shared by multiple threads without race
* conditions).
*/
@InterfaceAudience.Private
public class MetaRegionLocationCache extends ZooKeeperListener {
private static final Logger LOG = LoggerFactory.getLogger(MetaRegionLocationCache.class);
/**
* Maximum number of times we retry when ZK operation times out.
*/
private static final int MAX_ZK_META_FETCH_RETRIES = 10;
/**
* Sleep interval ms between ZK operation retries.
*/
private static final int SLEEP_INTERVAL_MS_BETWEEN_RETRIES = 1000;
private static final int SLEEP_INTERVAL_MS_MAX = 10000;
private final RetryCounterFactory retryCounterFactory =
new RetryCounterFactory(MAX_ZK_META_FETCH_RETRIES, SLEEP_INTERVAL_MS_BETWEEN_RETRIES);
/**
* Cached meta region locations indexed by replica ID.
* CopyOnWriteArrayMap ensures synchronization during updates and a consistent snapshot during
* client requests. Even though CopyOnWriteArrayMap copies the data structure for every write,
* that should be OK since the size of the list is often small and mutations are not too often
* and we do not need to block client requests while mutations are in progress.
*/
private final CopyOnWriteArrayMap<Integer, HRegionLocation> cachedMetaLocations;
private enum ZNodeOpType {
INIT,
CREATED,
CHANGED,
DELETED
}
public MetaRegionLocationCache(ZooKeeperWatcher zkWatcher) {
super(zkWatcher);
cachedMetaLocations = new CopyOnWriteArrayMap<>();
watcher.registerListener(this);
// Populate the initial snapshot of data from meta znodes.
// This is needed because stand-by masters can potentially start after the initial znode
// creation. It blocks forever until the initial meta locations are loaded from ZK and watchers
// are established. Subsequent updates are handled by the registered listener. Also, this runs
// in a separate thread in the background to not block master init.
ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).build();
final RetryCounterFactory retryFactory = new RetryCounterFactory(
Integer.MAX_VALUE, SLEEP_INTERVAL_MS_BETWEEN_RETRIES, SLEEP_INTERVAL_MS_MAX);
threadFactory.newThread(
new Runnable() {
@Override
public void run() {
MetaRegionLocationCache.this.loadMetaLocationsFromZk(
retryFactory.create(), ZNodeOpType.INIT);
}
}).start();
}
/**
* Populates the current snapshot of meta locations from ZK. If no meta znodes exist, it registers
* a watcher on base znode to check for any CREATE/DELETE events on the children.
* @param retryCounter controls the number of retries and sleep between retries.
*/
private void loadMetaLocationsFromZk(RetryCounter retryCounter, ZNodeOpType opType) {
List<String> znodes = null;
while (retryCounter.shouldRetry()) {
try {
znodes = watcher.getMetaReplicaNodesAndWatchChildren();
break;
} catch (KeeperException ke) {
LOG.debug("Error populating initial meta locations", ke);
if (!retryCounter.shouldRetry()) {
// Retries exhausted and watchers not set. This is not a desirable state since the cache
// could remain stale forever. Propagate the exception.
watcher.abort("Error populating meta locations", ke);
return;
}
try {
retryCounter.sleepUntilNextRetry();
} catch (InterruptedException ie) {
LOG.error("Interrupted while loading meta locations from ZK", ie);
Thread.currentThread().interrupt();
return;
}
}
}
if (znodes == null || znodes.isEmpty()) {
// No meta znodes exist at this point but we registered a watcher on the base znode to listen
// for updates. They will be handled via nodeChildrenChanged().
return;
}
if (znodes.size() == cachedMetaLocations.size()) {
// No new meta znodes got added.
return;
}
for (String znode: znodes) {
String path = joinZNode(watcher.baseZNode, znode);
updateMetaLocation(path, opType);
}
}
/**
* Gets the HRegionLocation for a given meta replica ID. Renews the watch on the znode for
* future updates.
* @param replicaId ReplicaID of the region.
* @return HRegionLocation for the meta replica.
* @throws KeeperException if there is any issue fetching/parsing the serialized data.
*/
private HRegionLocation getMetaRegionLocation(int replicaId)
throws KeeperException {
RegionState metaRegionState;
try {
byte[] data = ZKUtil.getDataAndWatch(watcher,
watcher.getZNodeForReplica(replicaId));
metaRegionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId);
} catch (DeserializationException e) {
throw ZKUtil.convert(e);
}
return new HRegionLocation(metaRegionState.getRegion(), metaRegionState.getServerName());
}
private void updateMetaLocation(String path, ZNodeOpType opType) {
if (!isValidMetaZNode(path)) {
return;
}
LOG.debug("Updating meta znode for path {}: {}", path, opType.name());
int replicaId = watcher.getMetaReplicaIdFromPath(path);
RetryCounter retryCounter = retryCounterFactory.create();
HRegionLocation location = null;
while (retryCounter.shouldRetry()) {
try {
if (opType == ZNodeOpType.DELETED) {
if (!ZKUtil.watchAndCheckExists(watcher, path)) {
// The path does not exist, we've set the watcher and we can break for now.
break;
}
// If it is a transient error and the node appears right away, we fetch the
// latest meta state.
}
location = getMetaRegionLocation(replicaId);
break;
} catch (KeeperException e) {
LOG.debug("Error getting meta location for path {}", path, e);
if (!retryCounter.shouldRetry()) {
LOG.warn("Error getting meta location for path {}. Retries exhausted.", path, e);
break;
}
try {
retryCounter.sleepUntilNextRetry();
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
return;
}
}
}
if (location == null) {
cachedMetaLocations.remove(replicaId);
return;
}
cachedMetaLocations.put(replicaId, location);
}
/**
* @return Optional list of HRegionLocations for meta replica(s), null if the cache is empty.
*
*/
public List<HRegionLocation> getMetaRegionLocations() {
ConcurrentNavigableMap<Integer, HRegionLocation> snapshot =
cachedMetaLocations.tailMap(cachedMetaLocations.firstKey());
List<HRegionLocation> result = new ArrayList<>();
if (snapshot.isEmpty()) {
// This could be possible if the master has not successfully initialized yet or meta region
// is stuck in some weird state.
return result;
}
// Explicitly iterate instead of new ArrayList<>(snapshot.values()) because the underlying
// ArrayValueCollection does not implement toArray().
for (HRegionLocation location: snapshot.values()) {
result.add(location);
}
return result;
}
/**
* Helper to check if the given 'path' corresponds to a meta znode. This listener is only
* interested in changes to meta znodes.
*/
private boolean isValidMetaZNode(String path) {
return watcher.isAnyMetaReplicaZNode(path);
}
@Override
public void nodeCreated(String path) {
updateMetaLocation(path, ZNodeOpType.CREATED);
}
@Override
public void nodeDeleted(String path) {
updateMetaLocation(path, ZNodeOpType.DELETED);
}
@Override
public void nodeDataChanged(String path) {
updateMetaLocation(path, ZNodeOpType.CHANGED);
}
@Override
public void nodeChildrenChanged(String path) {
if (!path.equals(watcher.baseZNode)) {
return;
}
loadMetaLocationsFromZk(retryCounterFactory.create(), ZNodeOpType.CHANGED);
}
}

View File

@ -0,0 +1,237 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import com.google.common.base.Preconditions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.MultithreadedTestUtil;
import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.MetaRegionLocationCache;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({SmallTests.class, MasterTests.class })
public class TestMetaRegionLocationCache {
private static final Log LOG = LogFactory.getLog(TestMetaRegionLocationCache.class.getName());
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static Registry REGISTRY;
// waits for all replicas to have region location
static void waitUntilAllMetaReplicasHavingRegionLocation(Configuration conf,
final Registry registry, final int regionReplication) throws IOException {
Waiter.waitFor(conf, conf.getLong(
"hbase.client.sync.wait.timeout.msec", 60000), 200, true,
new Waiter.ExplainingPredicate<IOException>() {
@Override
public String explainFailure() throws IOException {
return "Not all meta replicas get assigned";
}
@Override
public boolean evaluate() throws IOException {
try {
RegionLocations locs = registry.getMetaRegionLocation();
if (locs == null || locs.size() < regionReplication) {
return false;
}
for (int i = 0; i < regionReplication; i++) {
if (locs.getRegionLocation(i) == null) {
return false;
}
}
return true;
} catch (Exception e) {
LOG.warn("Failed to get meta region locations", e);
return false;
}
}
});
}
@BeforeClass
public static void setUp() throws Exception {
TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3);
TEST_UTIL.startMiniCluster(3);
REGISTRY = RegistryFactory.getRegistry(TEST_UTIL.getConnection());
waitUntilAllMetaReplicasHavingRegionLocation(
TEST_UTIL.getConfiguration(), REGISTRY, 3);
TEST_UTIL.getConnection().getAdmin().setBalancerRunning(false, true);
}
@AfterClass
public static void cleanUp() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
private List<HRegionLocation> getCurrentMetaLocations(ZooKeeperWatcher zk) throws Exception {
List<HRegionLocation> result = new ArrayList<>();
for (String znode: zk.getMetaReplicaNodes()) {
String path = ZKUtil.joinZNode(zk.baseZNode, znode);
int replicaId = zk.getMetaReplicaIdFromPath(path);
RegionState state = MetaTableLocator.getMetaRegionState(zk, replicaId);
result.add(new HRegionLocation(state.getRegion(), state.getServerName()));
}
return result;
}
// Verifies that the cached meta locations in the given master are in sync with what is in ZK.
private void verifyCachedMetaLocations(final HMaster master) throws Exception {
// Wait until initial meta locations are loaded.
ZooKeeperWatcher zk = master.getZooKeeper();
final List<String> metaZnodes = zk.getMetaReplicaNodes();
assertEquals(3, metaZnodes.size());
TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return master.getMetaRegionLocationCache().getMetaRegionLocations().size()
== metaZnodes.size();
}
});
List<HRegionLocation> metaHRLs = master.getMetaRegionLocationCache().getMetaRegionLocations();
List<HRegionLocation> actualHRLs = getCurrentMetaLocations(zk);
Collections.sort(metaHRLs);
Collections.sort(actualHRLs);
assertEquals(actualHRLs, metaHRLs);
}
@Test public void testInitialMetaLocations() throws Exception {
verifyCachedMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster());
}
@Test public void testStandByMetaLocations() throws Exception {
HMaster standBy = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
verifyCachedMetaLocations(standBy);
}
private static ServerName getOtherRS(List<ServerName> allServers, ServerName except) {
Preconditions.checkArgument(allServers.size() > 0);
allServers.remove(except);
ServerName ret;
try {
Collections.shuffle(allServers);
ret = allServers.get(0);
} finally {
allServers.add(except);
}
return ret;
}
/*
* Shuffles the meta region replicas around the cluster and makes sure the cache is not stale.
*/
@Test public void testMetaLocationsChange() throws Exception {
List<HRegionLocation> currentMetaLocs =
getCurrentMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster().getZooKeeper());
List<ServerName> allServers = new ArrayList<>();
for (JVMClusterUtil.RegionServerThread rs:
TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads()) {
allServers.add(rs.getRegionServer().getServerName());
}
// Move these replicas to random servers.
for (HRegionLocation location: currentMetaLocs) {
TEST_UTIL.moveRegionAndWait(
location.getRegionInfo(), getOtherRS(allServers, location.getServerName()));
}
waitUntilAllMetaReplicasHavingRegionLocation(
TEST_UTIL.getConfiguration(), REGISTRY, 3);
for (JVMClusterUtil.MasterThread masterThread:
TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) {
verifyCachedMetaLocations(masterThread.getMaster());
}
}
/**
* Tests MetaRegionLocationCache's init procedure to make sure that it correctly watches the base
* znode for notifications.
*/
@Test public void testMetaRegionLocationCache() throws Exception {
final String parentZnodeName = "/randomznodename";
Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parentZnodeName);
ServerName sn = ServerName.valueOf("localhost", 1234, 5678);
try (ZooKeeperWatcher zkWatcher = new ZooKeeperWatcher(conf, null, null, true)) {
// A thread that repeatedly creates and drops an unrelated child znode. This is to simulate
// some ZK activity in the background.
MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(conf);
ctx.addThread(new MultithreadedTestUtil.RepeatingTestThread(ctx) {
@Override public void doAnAction() throws Exception {
final String testZnode = parentZnodeName + "/child";
ZKUtil.createNodeIfNotExistsAndWatch(zkWatcher, testZnode, testZnode.getBytes());
ZKUtil.deleteNode(zkWatcher, testZnode);
}
});
ctx.startThreads();
try {
MetaRegionLocationCache metaCache = new MetaRegionLocationCache(zkWatcher);
// meta znodes do not exist at this point, cache should be empty.
assertTrue(metaCache.getMetaRegionLocations().isEmpty());
// Set the meta locations for a random meta replicas, simulating an active hmaster meta
// assignment.
for (int i = 0; i < 3; i++) {
// Updates the meta znodes.
MetaTableLocator.setMetaLocation(zkWatcher, sn, i, RegionState.State.OPEN);
}
// Wait until the meta cache is populated.
int iters = 0;
while (iters++ < 10) {
if (metaCache.getMetaRegionLocations().size() == 3) {
break;
}
Thread.sleep(1000);
}
List<HRegionLocation> metaLocations = metaCache.getMetaRegionLocations();
assertNotNull(metaLocations);
assertEquals(3, metaLocations.size());
for (HRegionLocation location : metaLocations) {
assertEquals(sn, location.getServerName());
}
} finally {
// clean up.
ctx.stop();
ZKUtil.deleteChildrenRecursively(zkWatcher, parentZnodeName);
}
}
}
}

View File

@ -122,5 +122,4 @@ public class TestRegionsRecoveryConfigManager {
}
}
}
}

View File

@ -19,19 +19,22 @@
package org.apache.hadoop.hbase.protobuf;
import static org.junit.Assert.assertEquals;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Column;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto;
@ -45,8 +48,8 @@ import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
import com.google.protobuf.ByteString;
/**
* Class to test ProtobufUtil.
@ -350,4 +353,31 @@ public class TestProtobufUtil {
ProtobufUtil.toScan(expectedProto));
assertEquals(expectedProto, actualProto);
}
@Test
public void testMetaRegionState() throws Exception {
ServerName serverName = ServerName.valueOf("localhost", 1234, 5678);
// New region state style.
for (RegionState.State state: RegionState.State.values()) {
RegionState regionState =
new RegionState(HRegionInfo.FIRST_META_REGIONINFO, state, serverName);
MetaRegionServer metars = MetaRegionServer.newBuilder()
.setServer(org.apache.hadoop.hbase.protobuf.ProtobufUtil.toServerName(serverName))
.setRpcVersion(HConstants.RPC_CURRENT_VERSION)
.setState(state.convert()).build();
// Serialize
byte[] data = ProtobufUtil.prependPBMagic(metars.toByteArray());
ProtobufUtil.prependPBMagic(data);
// Deserialize
RegionState regionStateNew = ProtobufUtil.parseMetaRegionStateFrom(data, 1);
assertEquals(regionState.getServerName(), regionStateNew.getServerName());
assertEquals(regionState.getState(), regionStateNew.getState());
}
// old style.
RegionState rs =
org.apache.hadoop.hbase.protobuf.ProtobufUtil.parseMetaRegionStateFrom(
serverName.getVersionedBytes(), 1);
assertEquals(serverName, rs.getServerName());
assertEquals(rs.getState(), RegionState.State.OPEN);
}
}