From 6b20a0f831b195c6f136a3decee61f1d80ebde29 Mon Sep 17 00:00:00 2001 From: Devaraj Das Date: Tue, 27 Jan 2015 16:40:29 -0800 Subject: [PATCH] HBASE-11574. hbase:meta's regions can be replicated --- .../org/apache/hadoop/hbase/HRegionInfo.java | 14 +- .../hadoop/hbase/MetaTableAccessor.java | 4 +- .../hbase/client/ConnectionManager.java | 10 +- .../hadoop/hbase/client/HBaseAdmin.java | 3 - .../hadoop/hbase/client/HConnectionKey.java | 1 + .../hadoop/hbase/client/MetaScanner.java | 13 +- .../hbase/client/RegionReplicaUtil.java | 10 +- .../client/ScannerCallableWithReplicas.java | 2 +- .../hbase/client/ZooKeeperRegistry.java | 31 +- .../hbase/zookeeper/MetaTableLocator.java | 201 ++++++++- .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 12 +- .../hbase/zookeeper/ZooKeeperWatcher.java | 88 +++- .../org/apache/hadoop/hbase/HConstants.java | 6 + .../hbase/IntegrationTestMetaReplicas.java | 108 +++++ .../hbase/master/AssignmentManager.java | 18 +- .../apache/hadoop/hbase/master/HMaster.java | 90 +++- .../hadoop/hbase/master/RegionStateStore.java | 2 +- .../master/balancer/BaseLoadBalancer.java | 5 +- .../handler/MetaServerShutdownHandler.java | 2 +- .../master/handler/ServerShutdownHandler.java | 16 +- .../hadoop/hbase/regionserver/HRegion.java | 12 +- .../hbase/regionserver/HRegionServer.java | 18 +- .../regionserver/StorefileRefresherChore.java | 14 +- .../apache/hadoop/hbase/util/HBaseFsck.java | 152 ++++--- .../resources/hbase-webapps/master/table.jsp | 18 +- .../hbase/client/TestMetaWithReplicas.java | 393 ++++++++++++++++++ .../hbase/master/TestMasterNoCluster.java | 4 +- .../regionserver/TestDefaultMemStore.java | 36 ++ .../TestStoreFileRefresherChore.java | 2 +- 29 files changed, 1131 insertions(+), 154 deletions(-) create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestMetaReplicas.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java index 9fcf3c3607c..7a43a50d460 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.master.RegionState; @@ -219,12 +220,16 @@ public class HRegionInfo implements Comparable { * first meta regions */ private HRegionInfo(long regionId, TableName tableName) { + this(regionId, tableName, DEFAULT_REPLICA_ID); + } + + public HRegionInfo(long regionId, TableName tableName, int replicaId) { super(); this.regionId = regionId; this.tableName = tableName; - // Note: First Meta regions names are still in old format - this.regionName = createRegionName(tableName, null, - regionId, false); + this.replicaId = replicaId; + // Note: First Meta region replicas names are in old format + this.regionName = createRegionName(tableName, null, regionId, replicaId, false); setHashCode(); } @@ -914,7 +919,8 @@ public class HRegionInfo implements Comparable { TableName tableName = ProtobufUtil.toTableName(proto.getTableName()); if (tableName.equals(TableName.META_TABLE_NAME)) { - return FIRST_META_REGIONINFO; + return RegionReplicaUtil.getRegionInfoForReplica(FIRST_META_REGIONINFO, + proto.getReplicaId()); } long regionId = proto.getRegionId(); int replicaId = proto.hasReplicaId() ? proto.getReplicaId() : DEFAULT_REPLICA_ID; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 69cd543fbe9..cb031cec4fd 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -1095,7 +1095,7 @@ public class MetaTableAccessor { * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this * does not add its daughter's as different rows, but adds information about the daughters * in the same row as the parent. Use - * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int) + * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int)} * if you want to do that. * @param meta the Table for META * @param regionInfo region information @@ -1117,7 +1117,7 @@ public class MetaTableAccessor { * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this * does not add its daughter's as different rows, but adds information about the daughters * in the same row as the parent. Use - * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int) + * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int)} * if you want to do that. * @param connection connection we're using * @param regionInfo region information diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java index 358ef3e127a..bb1fe7bf528 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionManager.java @@ -551,6 +551,7 @@ final class ConnectionManager { static class HConnectionImplementation implements ClusterConnection, Closeable { static final Log LOG = LogFactory.getLog(HConnectionImplementation.class); private final long pause; + private final boolean useMetaReplicas; private final int numTries; final int rpcTimeout; private NonceGenerator nonceGenerator = null; @@ -674,6 +675,8 @@ final class ConnectionManager { this.closed = false; this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE); + this.useMetaReplicas = conf.getBoolean(HConstants.USE_META_REPLICAS, + HConstants.DEFAULT_USE_META_REPLICAS); this.numTries = tableConfig.getRetriesNumber(); this.rpcTimeout = conf.getInt( HConstants.HBASE_RPC_TIMEOUT_KEY, @@ -1132,7 +1135,7 @@ final class ConnectionManager { RegionLocations locations = null; if (useCache) { locations = getCachedLocation(tableName, metaCacheKey); - if (locations != null) { + if (locations != null && locations.getRegionLocation(replicaId) != null) { return locations; } } @@ -1143,7 +1146,7 @@ final class ConnectionManager { // same query while we were waiting on the lock. if (useCache) { locations = getCachedLocation(tableName, metaCacheKey); - if (locations != null) { + if (locations != null && locations.getRegionLocation(replicaId) != null) { return locations; } } @@ -1183,6 +1186,9 @@ final class ConnectionManager { s.setStartRow(metaKey); s.setSmall(true); s.setCaching(1); + if (this.useMetaReplicas) { + s.setConsistency(Consistency.TIMELINE); + } int localNumRetries = (retry ? numTries : 1); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java index 5ca691548ac..ec0ee439afc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java @@ -421,9 +421,6 @@ public class HBaseAdmin implements Admin { public HTableDescriptor getTableDescriptor(final TableName tableName) throws TableNotFoundException, IOException { if (tableName == null) return null; - if (tableName.equals(TableName.META_TABLE_NAME)) { - return HTableDescriptor.META_TABLEDESC; - } HTableDescriptor htd = executeCallable(new MasterCallable(getConnection()) { @Override public HTableDescriptor call(int callTimeout) throws ServiceException { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionKey.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionKey.java index 0c2018189c5..f37690ca709 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionKey.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionKey.java @@ -49,6 +49,7 @@ class HConnectionKey { HConstants.HBASE_META_SCANNER_CACHING, HConstants.HBASE_CLIENT_INSTANCE_ID, HConstants.RPC_CODEC_CONF_KEY, + HConstants.USE_META_REPLICAS, RpcControllerFactory.CUSTOM_CONTROLLER_CONF_KEY}; private Map properties; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java index f2815633121..7d91dbbda99 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetaScanner.java @@ -153,7 +153,9 @@ public final class MetaScanner { try (Table metaTable = new HTable(TableName.META_TABLE_NAME, connection, null)) { if (row != null) { // Scan starting at a particular row in a particular table - Result startRowResult = getClosestRowOrBefore(metaTable, tableName, row); + Result startRowResult = getClosestRowOrBefore(metaTable, tableName, row, + connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, + HConstants.DEFAULT_USE_META_REPLICAS)); if (startRowResult == null) { throw new TableNotFoundException("Cannot find row in " + metaTable.getName() + " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); @@ -177,6 +179,10 @@ public final class MetaScanner { int scannerCaching = connection.getConfiguration() .getInt(HConstants.HBASE_META_SCANNER_CACHING, HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); + if (connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, + HConstants.DEFAULT_USE_META_REPLICAS)) { + scan.setConsistency(Consistency.TIMELINE); + } if (rowUpperLimit <= scannerCaching) { scan.setSmall(true); } @@ -215,10 +221,13 @@ public final class MetaScanner { * @throws IOException */ private static Result getClosestRowOrBefore(final Table metaTable, final TableName userTableName, - final byte [] row) + final byte [] row, boolean useMetaReplicas) throws IOException { byte[] searchRow = HRegionInfo.createRegionName(userTableName, row, HConstants.NINES, false); Scan scan = Scan.createGetClosestRowOrBeforeReverseScan(searchRow); + if (useMetaReplicas) { + scan.setConsistency(Consistency.TIMELINE); + } try (ResultScanner resultScanner = metaTable.getScanner(scan)) { return resultScanner.next(); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java index 91d1f9b1d6c..801e1b972b2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionReplicaUtil.java @@ -47,9 +47,13 @@ public class RegionReplicaUtil { if (regionInfo.getReplicaId() == replicaId) { return regionInfo; } - HRegionInfo replicaInfo = new HRegionInfo(regionInfo.getTable(), regionInfo.getStartKey(), - regionInfo.getEndKey(), regionInfo.isSplit(), regionInfo.getRegionId(), replicaId); - + HRegionInfo replicaInfo; + if (regionInfo.isMetaRegion()) { + replicaInfo = new HRegionInfo(regionInfo.getRegionId(), regionInfo.getTable(), replicaId); + } else { + replicaInfo = new HRegionInfo(regionInfo.getTable(), regionInfo.getStartKey(), + regionInfo.getEndKey(), regionInfo.isSplit(), regionInfo.getRegionId(), replicaId); + } replicaInfo.setOffline(regionInfo.isOffline()); return replicaInfo; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java index 92293f236c9..0697840f19e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java @@ -272,7 +272,7 @@ class ScannerCallableWithReplicas implements RetryingCallable { return 0; // not scheduling on other replicas for strong consistency } for (int id = min; id <= max; id++) { - if (currentScannerCallable.getHRegionInfo().getReplicaId() == id) { + if (currentScannerCallable.id == id) { continue; //this was already scheduled earlier } ScannerCallable s = currentScannerCallable.getScannerCallableForReplica(id); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java index 11a095e542d..26aca1858fa 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -54,14 +55,32 @@ class ZooKeeperRegistry implements Registry { if (LOG.isTraceEnabled()) { LOG.trace("Looking up meta region location in ZK," + " connection=" + this); } - ServerName servername = new MetaTableLocator().blockUntilAvailable(zkw, hci.rpcTimeout); + List servers = new MetaTableLocator().blockUntilAvailable(zkw, hci.rpcTimeout, + hci.getConfiguration()); if (LOG.isTraceEnabled()) { - LOG.trace("Looked up meta region location, connection=" + this + - "; serverName=" + ((servername == null) ? "null" : servername)); + if (servers == null) { + LOG.trace("Looked up meta region location, connection=" + this + + "; servers = null"); + } else { + StringBuilder str = new StringBuilder(); + for (ServerName s : servers) { + str.append(s.toString()); + str.append(" "); + } + LOG.trace("Looked up meta region location, connection=" + this + + "; servers = " + str.toString()); + } } - if (servername == null) return null; - HRegionLocation loc = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, servername, 0); - return new RegionLocations(new HRegionLocation[] {loc}); + if (servers == null) return null; + HRegionLocation[] locs = new HRegionLocation[servers.size()]; + int i = 0; + for (ServerName server : servers) { + HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, i); + if (server == null) locs[i++] = null; + else locs[i++] = new HRegionLocation(h, server, 0); + } + return new RegionLocations(locs); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java index e4c1e0b07b6..bfdf3945973 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java @@ -30,12 +30,14 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.ipc.FailedServerException; @@ -75,9 +77,6 @@ import com.google.protobuf.InvalidProtocolBufferException; public class MetaTableLocator { private static final Log LOG = LogFactory.getLog(MetaTableLocator.class); - static final byte [] META_REGION_NAME = - HRegionInfo.FIRST_META_REGIONINFO.getRegionName(); - // only needed to allow non-timeout infinite waits to stop when cluster shuts down private volatile boolean stopped = false; @@ -94,9 +93,21 @@ public class MetaTableLocator { * @return meta table regions and their locations. */ public List> getMetaRegionsAndLocations(ZooKeeperWatcher zkw) { - ServerName serverName = new MetaTableLocator().getMetaRegionLocation(zkw); + return getMetaRegionsAndLocations(zkw, HRegionInfo.DEFAULT_REPLICA_ID); + } + + /** + * + * @param zkw + * @param replicaId + * @return meta table regions and their locations. + */ + public List> getMetaRegionsAndLocations(ZooKeeperWatcher zkw, + int replicaId) { + ServerName serverName = getMetaRegionLocation(zkw, replicaId); List> list = new ArrayList>(); - list.add(new Pair(HRegionInfo.FIRST_META_REGIONINFO, serverName)); + list.add(new Pair(RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, replicaId), serverName)); return list; } @@ -105,8 +116,18 @@ public class MetaTableLocator { * @return List of meta regions */ public List getMetaRegions(ZooKeeperWatcher zkw) { + return getMetaRegions(zkw, HRegionInfo.DEFAULT_REPLICA_ID); + } + + /** + * + * @param zkw + * @param replicaId + * @return List of meta regions + */ + public List getMetaRegions(ZooKeeperWatcher zkw, int replicaId) { List> result; - result = getMetaRegionsAndLocations(zkw); + result = getMetaRegionsAndLocations(zkw, replicaId); return getListOfHRegionInfos(result); } @@ -134,17 +155,52 @@ public class MetaTableLocator { } } + /** + * Gets the meta region location, if available. Does not block. + * @param zkw + * @param replicaId + * @return server name + */ + public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw, int replicaId) { + try { + RegionState state = getMetaRegionState(zkw, replicaId); + return state.isOpened() ? state.getServerName() : null; + } catch (KeeperException ke) { + return null; + } + } + /** * Gets the meta region location, if available, and waits for up to the * specified timeout if not immediately available. * Given the zookeeper notification could be delayed, we will try to * get the latest data. + * @param zkw * @param timeout maximum time to wait, in millis * @return server name for server hosting meta region formatted as per * {@link ServerName}, or null if none available * @throws InterruptedException if interrupted while waiting + * @throws NotAllMetaRegionsOnlineException */ public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, long timeout) + throws InterruptedException, NotAllMetaRegionsOnlineException { + return waitMetaRegionLocation(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout); + } + + /** + * Gets the meta region location, if available, and waits for up to the + * specified timeout if not immediately available. + * Given the zookeeper notification could be delayed, we will try to + * get the latest data. + * @param zkw + * @param replicaId + * @param timeout maximum time to wait, in millis + * @return server name for server hosting meta region formatted as per + * {@link ServerName}, or null if none available + * @throws InterruptedException + * @throws NotAllMetaRegionsOnlineException + */ + public ServerName waitMetaRegionLocation(ZooKeeperWatcher zkw, int replicaId, long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException { try { if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) { @@ -156,7 +212,7 @@ public class MetaTableLocator { } catch (KeeperException e) { throw new IllegalStateException("KeeperException while trying to check baseZNode:", e); } - ServerName sn = blockUntilAvailable(zkw, timeout); + ServerName sn = blockUntilAvailable(zkw, replicaId, timeout); if (sn == null) { throw new NotAllMetaRegionsOnlineException("Timed out; " + timeout + "ms"); @@ -193,6 +249,8 @@ public class MetaTableLocator { /** * Verify hbase:meta is deployed and accessible. + * @param hConnection + * @param zkw * @param timeout How long to wait on zk for meta address (passed through to * the internal call to {@link #getMetaServerConnection}. * @return True if the hbase:meta location is healthy. @@ -201,10 +259,26 @@ public class MetaTableLocator { */ public boolean verifyMetaRegionLocation(HConnection hConnection, ZooKeeperWatcher zkw, final long timeout) + throws InterruptedException, IOException { + return verifyMetaRegionLocation(hConnection, zkw, timeout, HRegionInfo.DEFAULT_REPLICA_ID); + } + + /** + * Verify hbase:meta is deployed and accessible. + * @param hConnection + * @param zkw + * @param timeout How long to wait on zk for meta address (passed through to + * @param replicaId + * @return True if the hbase:meta location is healthy. + * @throws InterruptedException + * @throws IOException + */ + public boolean verifyMetaRegionLocation(HConnection hConnection, + ZooKeeperWatcher zkw, final long timeout, int replicaId) throws InterruptedException, IOException { AdminProtos.AdminService.BlockingInterface service = null; try { - service = getMetaServerConnection(hConnection, zkw, timeout); + service = getMetaServerConnection(hConnection, zkw, timeout, replicaId); } catch (NotAllMetaRegionsOnlineException e) { // Pass } catch (ServerNotRunningYetException e) { @@ -215,7 +289,8 @@ public class MetaTableLocator { // Pass -- server name sends us to a server that is dying or already dead. } return (service != null) && verifyRegionLocation(service, - getMetaRegionLocation(zkw), META_REGION_NAME); + getMetaRegionLocation(zkw, replicaId), RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, replicaId).getRegionName()); } /** @@ -271,16 +346,19 @@ public class MetaTableLocator { * Gets a connection to the server hosting meta, as reported by ZooKeeper, * waiting up to the specified timeout for availability. *

WARNING: Does not retry. Use an {@link org.apache.hadoop.hbase.client.HTable} instead. + * @param hConnection + * @param zkw * @param timeout How long to wait on meta location + * @param replicaId * @return connection to server hosting meta * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException */ private AdminService.BlockingInterface getMetaServerConnection(HConnection hConnection, - ZooKeeperWatcher zkw, long timeout) + ZooKeeperWatcher zkw, long timeout, int replicaId) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { - return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, timeout)); + return getCachedConnection(hConnection, waitMetaRegionLocation(zkw, replicaId, timeout)); } /** @@ -345,6 +423,20 @@ public class MetaTableLocator { */ public static void setMetaLocation(ZooKeeperWatcher zookeeper, ServerName serverName, RegionState.State state) throws KeeperException { + setMetaLocation(zookeeper, serverName, HRegionInfo.DEFAULT_REPLICA_ID, state); + } + + /** + * Sets the location of hbase:meta in ZooKeeper to the + * specified server address. + * @param zookeeper + * @param serverName + * @param replicaId + * @param state + * @throws KeeperException + */ + public static void setMetaLocation(ZooKeeperWatcher zookeeper, + ServerName serverName, int replicaId, RegionState.State state) throws KeeperException { LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName); // Make the MetaRegionServer pb and then get its bytes and save this as // the znode content. @@ -354,10 +446,15 @@ public class MetaTableLocator { .setState(state.convert()).build(); byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray()); try { - ZKUtil.setData(zookeeper, zookeeper.metaServerZNode, data); + ZKUtil.setData(zookeeper, zookeeper.getZNodeForReplica(replicaId), data); } catch(KeeperException.NoNodeException nne) { - LOG.debug("META region location doesn't existed, create it"); - ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { + LOG.debug("META region location doesn't exist, create it"); + } else { + LOG.debug("META region location doesn't exist for replicaId " + replicaId + + ", create it"); + } + ZKUtil.createAndWatch(zookeeper, zookeeper.getZNodeForReplica(replicaId), data); } } @@ -365,10 +462,22 @@ public class MetaTableLocator { * Load the meta region state from the meta server ZNode. */ public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException { + return getMetaRegionState(zkw, HRegionInfo.DEFAULT_REPLICA_ID); + } + + /** + * Load the meta region state from the meta server ZNode. + * @param zkw + * @param replicaId + * @return regionstate + * @throws KeeperException + */ + public static RegionState getMetaRegionState(ZooKeeperWatcher zkw, int replicaId) + throws KeeperException { RegionState.State state = RegionState.State.OPEN; ServerName serverName = null; try { - byte[] data = ZKUtil.getData(zkw, zkw.metaServerZNode); + byte[] data = ZKUtil.getData(zkw, zkw.getZNodeForReplica(replicaId)); if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { try { int prefixLen = ProtobufUtil.lengthOfPBMagic(); @@ -396,7 +505,8 @@ public class MetaTableLocator { if (serverName == null) { state = RegionState.State.OFFLINE; } - return new RegionState(HRegionInfo.FIRST_META_REGIONINFO, + return new RegionState( + RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId), state, serverName); } @@ -407,14 +517,53 @@ public class MetaTableLocator { */ public void deleteMetaLocation(ZooKeeperWatcher zookeeper) throws KeeperException { - LOG.info("Deleting hbase:meta region location in ZooKeeper"); + deleteMetaLocation(zookeeper, HRegionInfo.DEFAULT_REPLICA_ID); + } + + public void deleteMetaLocation(ZooKeeperWatcher zookeeper, int replicaId) + throws KeeperException { + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { + LOG.info("Deleting hbase:meta region location in ZooKeeper"); + } else { + LOG.info("Deleting hbase:meta for " + replicaId + " region location in ZooKeeper"); + } try { // Just delete the node. Don't need any watches. - ZKUtil.deleteNode(zookeeper, zookeeper.metaServerZNode); + ZKUtil.deleteNode(zookeeper, zookeeper.getZNodeForReplica(replicaId)); } catch(KeeperException.NoNodeException nne) { // Has already been deleted } } + /** + * Wait until the primary meta region is available. Get the secondary + * locations as well but don't block for those. + * @param zkw + * @param timeout + * @param conf + * @return ServerName or null if we timed out. + * @throws InterruptedException + */ + public List blockUntilAvailable(final ZooKeeperWatcher zkw, + final long timeout, Configuration conf) + throws InterruptedException { + int numReplicasConfigured = 1; + try { + List metaReplicaNodes = zkw.getMetaReplicaNodes(); + numReplicasConfigured = metaReplicaNodes.size(); + } catch (KeeperException e) { + LOG.warn("Got ZK exception " + e); + } + List servers = new ArrayList(numReplicasConfigured); + ServerName server = blockUntilAvailable(zkw, timeout); + if (server == null) return null; + servers.add(server); + + for (int replicaId = 1; replicaId < numReplicasConfigured; replicaId++) { + // return all replica locations for the meta + servers.add(getMetaRegionLocation(zkw, replicaId)); + } + return servers; + } /** * Wait until the meta region is available and is not in transition. @@ -425,6 +574,20 @@ public class MetaTableLocator { */ public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, final long timeout) + throws InterruptedException { + return blockUntilAvailable(zkw, HRegionInfo.DEFAULT_REPLICA_ID, timeout); + } + + /** + * Wait until the meta region is available and is not in transition. + * @param zkw + * @param replicaId + * @param timeout + * @return ServerName or null if we timed out. + * @throws InterruptedException + */ + public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw, int replicaId, + final long timeout) throws InterruptedException { if (timeout < 0) throw new IllegalArgumentException(); if (zkw == null) throw new IllegalArgumentException(); @@ -432,7 +595,7 @@ public class MetaTableLocator { ServerName sn = null; try { while (true) { - sn = getMetaRegionLocation(zkw); + sn = getMetaRegionLocation(zkw, replicaId); if (sn != null || sw.elapsedMillis() > timeout - HConstants.SOCKET_RETRY_WAIT_MS) { break; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index b5b6bd66bc8..84e7f577aa4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -40,6 +40,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -966,7 +967,7 @@ public class ZKUtil { // Certain znodes are accessed directly by the client, // so they must be readable by non-authenticated clients if ((node.equals(zkw.baseZNode) == true) || - (node.equals(zkw.metaServerZNode) == true) || + (zkw.isAnyMetaReplicaZnode(node)) || (node.equals(zkw.getMasterAddressZNode()) == true) || (node.equals(zkw.clusterIdZNode) == true) || (node.equals(zkw.rsZNode) == true) || @@ -1699,6 +1700,13 @@ public class ZKUtil { } sb.append("\nRegion server holding hbase:meta: " + new MetaTableLocator().getMetaRegionLocation(zkw)); + Configuration conf = HBaseConfiguration.create(); + int numMetaReplicas = conf.getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); + for (int i = 1; i < numMetaReplicas; i++) { + sb.append("\nRegion server holding hbase:meta, replicaId " + i + " " + + new MetaTableLocator().getMetaRegionLocation(zkw, i)); + } sb.append("\nRegion servers:"); for (String child : listChildrenNoWatch(zkw, zkw.rsZNode)) { sb.append("\n ").append(child); @@ -1887,7 +1895,7 @@ public class ZKUtil { " byte(s) of data from znode " + znode + (watcherSet? " and set watcher; ": "; data=") + (data == null? "null": data.length == 0? "empty": ( - znode.startsWith(zkw.metaServerZNode)? + znode.startsWith(ZooKeeperWatcher.META_ZNODE_PREFIX)? getServerNameOrEmptyString(data): znode.startsWith(zkw.backupMasterAddressesZNode)? getServerNameOrEmptyString(data): diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java index f287a0e24bc..575dcdfde1d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java @@ -22,7 +22,9 @@ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; @@ -31,6 +33,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.zookeeper.KeeperException; @@ -82,8 +85,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable { // base znode for this cluster public String baseZNode; - // znode containing location of server hosting meta region - public String metaServerZNode; + //znodes containing the locations of the servers hosting the meta replicas + private Map metaReplicaZnodes = new HashMap(); // znode containing ephemeral nodes of the regionservers public String rsZNode; // znode containing ephemeral nodes of the draining regionservers @@ -110,6 +113,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable { // znode containing namespace descriptors public static String namespaceZNode = "namespace"; + public final static String META_ZNODE_PREFIX = "meta-region-server"; private final Configuration conf; @@ -200,8 +204,15 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable { private void setNodeNames(Configuration conf) { baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); - metaServerZNode = ZKUtil.joinZNode(baseZNode, - conf.get("zookeeper.znode.metaserver", "meta-region-server")); + metaReplicaZnodes.put(0, ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server"))); + int numMetaReplicas = conf.getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); + for (int i = 1; i < numMetaReplicas; i++) { + String str = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i); + metaReplicaZnodes.put(i, str); + } rsZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.rs", "rs")); drainingZNode = ZKUtil.joinZNode(baseZNode, @@ -228,6 +239,75 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable { conf.get("zookeeper.znode.namespace", "namespace")); } + /** + * Is the znode of any meta replica + * @param node + * @return true or false + */ + public boolean isAnyMetaReplicaZnode(String node) { + if (metaReplicaZnodes.values().contains(node)) { + return true; + } + return false; + } + + /** + * Is it the default meta replica's znode + * @param node + * @return true or false + */ + public boolean isDefaultMetaReplicaZnode(String node) { + if (getZNodeForReplica(HRegionInfo.DEFAULT_REPLICA_ID).equals(node)) { + return true; + } + return false; + } + + /** + * Get the znodes corresponding to the meta replicas from ZK + * @return list of znodes + * @throws KeeperException + */ + public List getMetaReplicaNodes() throws KeeperException { + List childrenOfBaseNode = ZKUtil.listChildrenNoWatch(this, baseZNode); + List metaReplicaNodes = new ArrayList(2); + String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server"); + for (String child : childrenOfBaseNode) { + if (child.startsWith(pattern)) metaReplicaNodes.add(child); + } + return metaReplicaNodes; + } + + /** + * Get the znode string corresponding to a replicaId + * @param replicaId + * @return znode + */ + public String getZNodeForReplica(int replicaId) { + String str = metaReplicaZnodes.get(replicaId); + // return a newly created path but don't update the cache of paths + // This is mostly needed for tests that attempt to create meta replicas + // from outside the master + if (str == null) { + str = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + replicaId); + } + return str; + } + + /** + * Parse the meta replicaId from the passed znode + * @param znode + * @return replicaId + */ + public int getMetaReplicaIdFromZnode(String znode) { + String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server"); + if (znode.equals(pattern)) return HRegionInfo.DEFAULT_REPLICA_ID; + // the non-default replicas are of the pattern meta-region-server- + String nonDefaultPattern = pattern + "-"; + return Integer.parseInt(znode.substring(nonDefaultPattern.length())); + } + /** * Register the specified listener to receive ZooKeeper events. * @param listener diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index c2c6401a9b7..49bb7e16299 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -894,6 +894,12 @@ public final class HConstants { public static final String LOG_REPLAY_WAIT_REGION_TIMEOUT = "hbase.master.log.replay.wait.region.timeout"; + /** Conf key for enabling meta replication */ + public static final String USE_META_REPLICAS = "hbase.meta.replicas.use"; + public static final boolean DEFAULT_USE_META_REPLICAS = false; + public static final String META_REPLICAS_NUM = "hbase.meta.replica.count"; + public static final int DEFAULT_META_REPLICA_NUM = 1; + /** * The name of the configuration parameter that specifies * the number of bytes in a newly created checksum chunk. diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestMetaReplicas.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestMetaReplicas.java new file mode 100644 index 00000000000..f682ea17bac --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestMetaReplicas.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.client.TestMetaWithReplicas; +import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; +import org.apache.hadoop.hbase.testclassification.IntegrationTests; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.util.ToolRunner; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * An integration test that starts the cluster with three replicas for the meta + * It then creates a table, flushes the meta, kills the server holding the primary. + * After that a client issues put/get requests on the created table - the other + * replicas of the meta would be used to get the location of the region of the created + * table. + */ +@Category(IntegrationTests.class) +public class IntegrationTestMetaReplicas { + static final Log LOG = LogFactory.getLog(IntegrationTestMetaReplicas.class); + /** + * Util to get at the cluster. + */ + private static IntegrationTestingUtility util; + + @BeforeClass + public static void setUp() throws Exception { + // Set up the integration test util + if (util == null) { + util = new IntegrationTestingUtility(); + } + util.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3); + util.getConfiguration().setInt( + StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000); + // Make sure there are three servers. + util.initializeCluster(3); + ZooKeeperWatcher zkw = util.getZooKeeperWatcher(); + Configuration conf = util.getConfiguration(); + String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, + HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); + String primaryMetaZnode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server")); + // check that the data in the znode is parseable (this would also mean the znode exists) + byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); + ServerName.parseFrom(data); + waitUntilZnodeAvailable(1); + waitUntilZnodeAvailable(2); + } + + @AfterClass + public static void teardown() throws Exception { + //Clean everything up. + util.restoreCluster(); + util = null; + } + + private static void waitUntilZnodeAvailable(int replicaId) throws Exception { + String znode = util.getZooKeeperWatcher().getZNodeForReplica(replicaId); + int i = 0; + while (i < 1000) { + if (ZKUtil.checkExists(util.getZooKeeperWatcher(), znode) == -1) { + Thread.sleep(100); + i++; + } else break; + } + if (i == 1000) throw new IOException("znode for meta replica " + replicaId + " not available"); + } + + @Test + public void testShutdownHandling() throws Exception { + // This test creates a table, flushes the meta (with 3 replicas), kills the + // server holding the primary meta replica. Then it does a put/get into/from + // the test table. The put/get operations would use the replicas to locate the + // location of the test table's region + TestMetaWithReplicas.shutdownMetaAndDoValidations(util); + } + + public static void main(String[] args) throws Exception { + setUp(); + new IntegrationTestMetaReplicas().testShutdownHandling(); + teardown(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 6abb56d5687..5bc1f2d0820 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1381,14 +1381,15 @@ public class AssignmentManager { } /** - * Assigns the hbase:meta region. + * Assigns the hbase:meta region or a replica. *

* Assumes that hbase:meta is currently closed and is not being actively served by * any RegionServer. + * @param hri TODO */ - public void assignMeta() throws KeeperException { - regionStates.updateRegionState(HRegionInfo.FIRST_META_REGIONINFO, State.OFFLINE); - assign(HRegionInfo.FIRST_META_REGIONINFO); + public void assignMeta(HRegionInfo hri) throws KeeperException { + regionStates.updateRegionState(hri, State.OFFLINE); + assign(hri); } /** @@ -1968,6 +1969,15 @@ public class AssignmentManager { return isCarryingRegion(serverName, HRegionInfo.FIRST_META_REGIONINFO); } + public boolean isCarryingMetaReplica(ServerName serverName, int replicaId) { + return isCarryingRegion(serverName, + RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, replicaId)); + } + + public boolean isCarryingMetaReplica(ServerName serverName, HRegionInfo metaHri) { + return isCarryingRegion(serverName, metaHri); + } + /** * Check if the shutdown server carries the specific region. * @return whether the serverName currently hosts the region diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index de18ec61011..809b3113925 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -57,12 +57,14 @@ import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NamespaceNotFoundException; import org.apache.hadoop.hbase.PleaseHoldException; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; @@ -75,6 +77,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; @@ -341,6 +344,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { LOG.info("hbase.rootdir=" + FSUtils.getRootDir(this.conf) + ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false)); + // Disable usage of meta replicas in the master + this.conf.setBoolean(HConstants.USE_META_REPLICAS, false); + Replication.decorateMasterConfiguration(this.conf); // Hack! Maps DFSClient => Master for logs. HDFS made this @@ -603,7 +609,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // enable table descriptors cache this.tableDescriptors.setCacheOn(); - + // set the META's descriptor to the correct replication + this.tableDescriptors.get(TableName.META_TABLE_NAME).setRegionReplication( + conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)); // warm-up HTDs cache on master initialization if (preLoadTableDescriptors) { status.setStatus("Pre-loading table descriptors"); @@ -693,7 +701,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // Make sure meta assigned before proceeding. status.setStatus("Assigning Meta Region"); - assignMeta(status, previouslyFailedMetaRSs); + assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID); // check if master is shutting down because above assignMeta could return even hbase:meta isn't // assigned when master is shutting down if(isStopped()) return; @@ -737,6 +745,15 @@ public class HMaster extends HRegionServer implements MasterServices, Server { LOG.info("Master has completed initialization"); configurationManager.registerObserver(this.balancer); initialized = true; + // assign the meta replicas + Set EMPTY_SET = new HashSet(); + int numReplicas = conf.getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); + for (int i = 1; i < numReplicas; i++) { + assignMeta(status, EMPTY_SET, i); + } + unassignExcessMetaReplica(zooKeeper, numReplicas); + status.setStatus("Starting quota manager"); initQuotaManager(); @@ -773,47 +790,81 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return new ServerManager(master, services); } + private void unassignExcessMetaReplica(ZooKeeperWatcher zkw, int numMetaReplicasConfigured) { + // unassign the unneeded replicas (for e.g., if the previous master was configured + // with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica) + try { + List metaReplicaZnodes = zooKeeper.getMetaReplicaNodes(); + for (String metaReplicaZnode : metaReplicaZnodes) { + int replicaId = zooKeeper.getMetaReplicaIdFromZnode(metaReplicaZnode); + if (replicaId >= numMetaReplicasConfigured) { + RegionState r = MetaTableLocator.getMetaRegionState(zkw, replicaId); + serverManager.sendRegionClose(r.getServerName(), r.getRegion()); + ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(replicaId)); + } + } + } catch (Exception ex) { + // ignore the exception since we don't want the master to be wedged due to potential + // issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually + LOG.warn("Ignoring exception " + ex); + } + } + /** * Check hbase:meta is assigned. If not, assign it. * @param status MonitoredTask * @param previouslyFailedMetaRSs + * @param replicaId * @throws InterruptedException * @throws IOException * @throws KeeperException */ - void assignMeta(MonitoredTask status, Set previouslyFailedMetaRSs) + void assignMeta(MonitoredTask status, Set previouslyFailedMetaRSs, int replicaId) throws InterruptedException, IOException, KeeperException { // Work on meta region int assigned = 0; long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000); - status.setStatus("Assigning hbase:meta region"); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { + status.setStatus("Assigning hbase:meta region"); + } else { + status.setStatus("Assigning hbase:meta region, replicaId " + replicaId); + } // Get current meta state from zk. - RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper()); - + RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper(), replicaId); + HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, + replicaId); RegionStates regionStates = assignmentManager.getRegionStates(); - regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO, - metaState.getState(), metaState.getServerName(), null); + regionStates.createRegionState(hri, metaState.getState(), + metaState.getServerName(), null); if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation( - this.getConnection(), this.getZooKeeper(), timeout)) { + this.getConnection(), this.getZooKeeper(), timeout, replicaId)) { ServerName currentMetaServer = metaState.getServerName(); if (serverManager.isServerOnline(currentMetaServer)) { - LOG.info("Meta was in transition on " + currentMetaServer); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { + LOG.info("Meta was in transition on " + currentMetaServer); + } else { + LOG.info("Meta with replicaId " + replicaId + " was in transition on " + + currentMetaServer); + } assignmentManager.processRegionsInTransition(Arrays.asList(metaState)); } else { if (currentMetaServer != null) { - splitMetaLogBeforeAssignment(currentMetaServer); - regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO); - previouslyFailedMetaRSs.add(currentMetaServer); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) { + splitMetaLogBeforeAssignment(currentMetaServer); + regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO); + previouslyFailedMetaRSs.add(currentMetaServer); + } } - LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer); - assignmentManager.assignMeta(); + LOG.info("Re-assigning hbase:meta with replicaId, " + replicaId + + " it was on " + currentMetaServer); + assignmentManager.assignMeta(hri); } assigned++; } - enableMeta(TableName.META_TABLE_NAME); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableMeta(TableName.META_TABLE_NAME); if ((RecoveryMode.LOG_REPLAY == this.getMasterFileSystem().getLogRecoveryMode()) && (!previouslyFailedMetaRSs.isEmpty())) { @@ -826,10 +877,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // if the meta region server is died at this time, we need it to be re-assigned // by SSH so that system tables can be assigned. // No need to wait for meta is assigned = 0 when meta is just verified. - enableServerShutdownHandler(assigned != 0); - - LOG.info("hbase:meta assigned=" + assigned + ", location=" - + metaTableLocator.getMetaRegionLocation(this.getZooKeeper())); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableServerShutdownHandler(assigned != 0); + LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", location=" + + metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId)); status.setStatus("META assigned."); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java index 313b03e3ab4..9dd412ce43c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java @@ -170,7 +170,7 @@ public class RegionStateStore { // persist meta state in MetaTableLocator (which in turn is zk storage currently) try { MetaTableLocator.setMetaLocation(server.getZooKeeper(), - newState.getServerName(), newState.getState()); + newState.getServerName(), hri.getReplicaId(), newState.getState()); return; // Done } catch (KeeperException e) { throw new IOException("Failed to update meta ZNode", e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index d8dfbd0a3b9..dea0828f8e6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -857,10 +857,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer { /** * Check if a region belongs to some small system table. - * If so, it may be expected to be put on the master regionserver. + * If so, the primary replica may be expected to be put on the master regionserver. */ public boolean shouldBeOnMaster(HRegionInfo region) { - return tablesOnMaster.contains(region.getTable().getNameAsString()); + return tablesOnMaster.contains(region.getTable().getNameAsString()) + && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java index 23e41d23040..629f941c007 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java @@ -150,7 +150,7 @@ public class MetaServerShutdownHandler extends ServerShutdownHandler { getLong("hbase.catalog.verification.timeout", 1000); if (!server.getMetaTableLocator().verifyMetaRegionLocation(server.getConnection(), this.server.getZooKeeper(), timeout)) { - this.services.getAssignmentManager().assignMeta(); + this.services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO); } else if (serverName.equals(server.getMetaTableLocator().getMetaRegionLocation( this.server.getZooKeeper()))) { throw new IOException("hbase:meta is onlined on the dead server " diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java index 5b7b27b3d2b..26594f799f8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; @@ -198,7 +199,17 @@ public class ServerShutdownHandler extends EventHandler { } catch (IOException ioe) { resubmit(serverName, ioe); } - + List toAssignRegions = new ArrayList(); + int replicaCount = services.getConfiguration().getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); + for (int i = 1; i < replicaCount; i++) { + HRegionInfo metaHri = + RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i); + if (am.isCarryingMetaReplica(serverName, metaHri)) { + LOG.info("Reassigning meta replica" + metaHri + " that was on " + serverName); + toAssignRegions.add(metaHri); + } + } // Clean out anything in regions in transition. Being conservative and // doing after log splitting. Could do some states before -- OPENING? // OFFLINE? -- and then others after like CLOSING that depend on log @@ -208,8 +219,7 @@ public class ServerShutdownHandler extends EventHandler { " region(s) that " + (serverName == null? "null": serverName) + " was carrying (and " + regionsInTransition.size() + " regions(s) that were opening on this server)"); - - List toAssignRegions = new ArrayList(); + toAssignRegions.addAll(regionsInTransition); // Iterate regions that were on this server and assign them diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 2b6f9743c0f..8e44b395703 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -1167,6 +1167,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { // "hbase.regionserver.optionalcacheflushinterval"; /** Default interval for the memstore flush */ public static final int DEFAULT_CACHE_FLUSH_INTERVAL = 3600000; + public static final int META_CACHE_FLUSH_INTERVAL = 300000; // 5 minutes /** Conf key to force a flush if there are already enough changes for one region in memstore */ public static final String MEMSTORE_FLUSH_PER_CHANGES = @@ -1799,18 +1800,23 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver { // && (this.maxFlushedSeqId + this.flushPerChanges < this.sequenceId.get())) { return true; } - if (flushCheckInterval <= 0) { //disabled + long modifiedFlushCheckInterval = flushCheckInterval; + if (getRegionInfo().isMetaRegion() && + getRegionInfo().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { + modifiedFlushCheckInterval = META_CACHE_FLUSH_INTERVAL; + } + if (modifiedFlushCheckInterval <= 0) { //disabled return false; } long now = EnvironmentEdgeManager.currentTime(); //if we flushed in the recent past, we don't need to do again now - if ((now - getEarliestFlushTimeForAllStores() < flushCheckInterval)) { + if ((now - getEarliestFlushTimeForAllStores() < modifiedFlushCheckInterval)) { return false; } //since we didn't flush in the recent past, flush now if certain conditions //are met. Return true on first such memstore hit. for (Store s : this.getStores().values()) { - if (s.timeOfOldestEdit() < now - flushCheckInterval) { + if (s.timeOfOldestEdit() < now - modifiedFlushCheckInterval) { // we have an old enough edit in the memstore, flush return true; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 5263a99f4fe..5d6693385ac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -470,6 +470,8 @@ public class HRegionServer extends HasThread implements checkCodecs(this.conf); this.userProvider = UserProvider.instantiate(conf); FSUtils.setupShortCircuitRead(this.conf); + // Disable usage of meta replicas in the regionserver + this.conf.setBoolean(HConstants.USE_META_REPLICAS, false); // Config'ed params this.numRetries = this.conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, @@ -788,11 +790,19 @@ public class HRegionServer extends HasThread implements rpcClient = RpcClientFactory.createClient(conf, clusterId, new InetSocketAddress( rpcServices.isa.getAddress(), 0)); + boolean onlyMetaRefresh = false; int storefileRefreshPeriod = conf.getInt( StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD); + if (storefileRefreshPeriod == 0) { + storefileRefreshPeriod = conf.getInt( + StorefileRefresherChore.REGIONSERVER_META_STOREFILE_REFRESH_PERIOD, + StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD); + onlyMetaRefresh = true; + } if (storefileRefreshPeriod > 0) { - this.storefileRefresher = new StorefileRefresherChore(storefileRefreshPeriod, this, this); + this.storefileRefresher = new StorefileRefresherChore(storefileRefreshPeriod, + onlyMetaRefresh, this, this); } registerConfigurationObservers(); } @@ -1739,7 +1749,8 @@ public class HRegionServer extends HasThread implements WAL wal; LogRoller roller = walRoller; //_ROOT_ and hbase:meta regions have separate WAL. - if (regionInfo != null && regionInfo.isMetaTable()) { + if (regionInfo != null && regionInfo.isMetaTable() && + regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { roller = ensureMetaWALRoller(); wal = walFactory.getMetaWAL(regionInfo.getEncodedNameAsBytes()); } else if (regionInfo == null) { @@ -1837,7 +1848,8 @@ public class HRegionServer extends HasThread implements Preconditions.checkArgument(hris != null && hris.length == 1); if (hris[0].isMetaRegion()) { try { - MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, State.OPEN); + MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, + hris[0].getReplicaId(),State.OPEN); } catch (KeeperException e) { LOG.info("Failed to update meta location", e); return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java index 8a7bd64e731..f3c7e6adb10 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StorefileRefresherChore.java @@ -53,19 +53,28 @@ public class StorefileRefresherChore extends Chore { = "hbase.regionserver.storefile.refresh.period"; static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; //disabled by default + /** + * Whether all storefiles should be refreshed, as opposed to just hbase:meta's + * Meta region doesn't have WAL replication for replicas enabled yet + */ + public static final String REGIONSERVER_META_STOREFILE_REFRESH_PERIOD + = "hbase.regionserver.meta.storefile.refresh.period"; private HRegionServer regionServer; private long hfileTtl; private int period; + private boolean onlyMetaRefresh = true; //ts of last time regions store files are refreshed private Map lastRefreshTimes; // encodedName -> long - public StorefileRefresherChore(int period, HRegionServer regionServer, Stoppable stoppable) { + public StorefileRefresherChore(int period, boolean onlyMetaRefresh, HRegionServer regionServer, + Stoppable stoppable) { super("StorefileRefresherChore", period, stoppable); this.period = period; this.regionServer = regionServer; this.hfileTtl = this.regionServer.getConfiguration().getLong( TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL); + this.onlyMetaRefresh = onlyMetaRefresh; if (period > hfileTtl / 2) { throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD + " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY); @@ -80,6 +89,9 @@ public class StorefileRefresherChore extends Chore { // skip checking for this region if it can accept writes continue; } + // don't refresh unless enabled for all files, or it the meta region + // meta region don't have WAL replication for replicas enabled yet + if (onlyMetaRefresh && !r.getRegionInfo().isMetaTable()) continue; String encodedName = r.getRegionInfo().getEncodedName(); long time = EnvironmentEdgeManager.currentTime(); if (!lastRefreshTimes.containsKey(encodedName)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 7e6ee7b0ace..8e1d848d971 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALFactory; import org.apache.hadoop.hbase.wal.WALSplitter; import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.io.IOUtils; @@ -306,6 +307,8 @@ public class HBaseFsck extends Configured implements Closeable { setConf(HBaseConfiguration.create(getConf())); // disable blockcache for tool invocation, see HBASE-10500 getConf().setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0); + // Disable usage of meta replicas in hbck + getConf().setBoolean(HConstants.USE_META_REPLICAS, false); errors = getErrorReporter(conf); int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS); @@ -1608,28 +1611,29 @@ public class HBaseFsck extends Configured implements Closeable { * Record the location of the hbase:meta region as found in ZooKeeper. */ private boolean recordMetaRegion() throws IOException { - HRegionLocation metaLocation = connection.locateRegion( - TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW); - - // Check if Meta region is valid and existing - if (metaLocation == null || metaLocation.getRegionInfo() == null || - metaLocation.getHostname() == null) { + RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME, + HConstants.EMPTY_START_ROW, false, false); + if (rl == null) { errors.reportError(ERROR_CODE.NULL_META_REGION, - "META region or some of its attributes are null."); + "META region or some of its attributes are null."); return false; } - ServerName sn; - try { - sn = getMetaRegionServerName(); - } catch (KeeperException e) { - throw new IOException(e); - } - MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis()); - HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName()); - if (hbckInfo == null) { - regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m)); - } else { - hbckInfo.metaEntry = m; + for (HRegionLocation metaLocation : rl.getRegionLocations()) { + // Check if Meta region is valid and existing + if (metaLocation == null || metaLocation.getRegionInfo() == null || + metaLocation.getHostname() == null) { + errors.reportError(ERROR_CODE.NULL_META_REGION, + "META region or some of its attributes are null."); + return false; + } + ServerName sn = metaLocation.getServerName(); + MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, System.currentTimeMillis()); + HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName()); + if (hbckInfo == null) { + regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m)); + } else { + hbckInfo.metaEntry = m; + } } return true; } @@ -1650,12 +1654,12 @@ public class HBaseFsck extends Configured implements Closeable { }); } - private ServerName getMetaRegionServerName() + private ServerName getMetaRegionServerName(int replicaId) throws IOException, KeeperException { ZooKeeperWatcher zkw = createZooKeeperWatcher(); ServerName sn = null; try { - sn = new MetaTableLocator().getMetaRegionLocation(zkw); + sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId); } finally { zkw.close(); } @@ -3027,55 +3031,83 @@ public class HBaseFsck extends Configured implements Closeable { * If there are inconsistencies (i.e. zero or more than one regions * pretend to be holding the hbase:meta) try to fix that and report an error. * @throws IOException from HBaseFsckRepair functions - * @throws KeeperException - * @throws InterruptedException + * @throws KeeperException + * @throws InterruptedException */ boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException { - List metaRegions = Lists.newArrayList(); + Map metaRegions = new HashMap(); for (HbckInfo value : regionInfoMap.values()) { if (value.metaEntry != null && value.metaEntry.isMetaRegion()) { - metaRegions.add(value); + metaRegions.put(value.getReplicaId(), value); } } - - // There will be always one entry in regionInfoMap corresponding to hbase:meta - // Check the deployed servers. It should be exactly one server. - List servers = new ArrayList(); - HbckInfo metaHbckInfo = null; - if (!metaRegions.isEmpty()) { - metaHbckInfo = metaRegions.get(0); - servers = metaHbckInfo.deployedOn; - } - if (servers.size() != 1) { - if (servers.size() == 0) { - errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta is not found on any region."); - if (shouldFixAssignments()) { - errors.print("Trying to fix a problem with hbase:meta.."); - setShouldRerun(); - // try to fix it (treat it as unassigned region) - HBaseFsckRepair.fixUnassigned(admin, HRegionInfo.FIRST_META_REGIONINFO); - HBaseFsckRepair.waitUntilAssigned(admin, HRegionInfo.FIRST_META_REGIONINFO); - } - } else if (servers.size() > 1) { - errors - .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta is found on more than one region."); - if (shouldFixAssignments()) { - if (metaHbckInfo == null) { - errors.print( - "Unable to fix problem with hbase:meta due to hbase:meta region info missing"); - return false; + int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME) + .getRegionReplication(); + boolean noProblem = true; + // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas + // Check the deployed servers. It should be exactly one server for each replica. + for (int i = 0; i < metaReplication; i++) { + HbckInfo metaHbckInfo = metaRegions.remove(i); + List servers = new ArrayList(); + if (metaHbckInfo != null) { + servers = metaHbckInfo.deployedOn; + } + if (servers.size() != 1) { + noProblem = false; + if (servers.size() == 0) { + assignMetaReplica(i); + } else if (servers.size() > 1) { + errors + .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " + + metaHbckInfo.getReplicaId() + " is found on more than one region."); + if (shouldFixAssignments()) { + errors.print("Trying to fix a problem with hbase:meta, replicaId " + + metaHbckInfo.getReplicaId() +".."); + setShouldRerun(); + // try fix it (treat is a dupe assignment) + HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers); } - errors.print("Trying to fix a problem with hbase:meta.."); - setShouldRerun(); - // try fix it (treat is a dupe assignment) - HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers); } } - // rerun hbck with hopefully fixed META - return false; } - // no errors, so continue normally - return true; + // unassign whatever is remaining in metaRegions. They are excess replicas. + for (Map.Entry entry : metaRegions.entrySet()) { + noProblem = false; + errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED, + "hbase:meta replicas are deployed in excess. Configured " + metaReplication + + ", deployed " + metaRegions.size()); + if (shouldFixAssignments()) { + errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() + + " of hbase:meta.."); + setShouldRerun(); + unassignMetaReplica(entry.getValue()); + } + } + // if noProblem is false, rerun hbck with hopefully fixed META + // if noProblem is true, no errors, so continue normally + return noProblem; + } + + private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException, + KeeperException { + undeployRegions(hi); + ZooKeeperWatcher zkw = createZooKeeperWatcher(); + ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId())); + } + + private void assignMetaReplica(int replicaId) + throws IOException, KeeperException, InterruptedException { + errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " + + replicaId +" is not found on any region."); + if (shouldFixAssignments()) { + errors.print("Trying to fix a problem with hbase:meta.."); + setShouldRerun(); + // try to fix it (treat it as unassigned region) + HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, replicaId); + HBaseFsckRepair.fixUnassigned(admin, h); + HBaseFsckRepair.waitUntilAssigned(admin, h); + } } /** diff --git a/hbase-server/src/main/resources/hbase-webapps/master/table.jsp b/hbase-server/src/main/resources/hbase-webapps/master/table.jsp index c9878bf8ee0..58e5da44d70 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/table.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/table.jsp @@ -28,12 +28,14 @@ import="org.apache.hadoop.hbase.ServerName" import="org.apache.hadoop.hbase.ServerLoad" import="org.apache.hadoop.hbase.RegionLoad" + import="org.apache.hadoop.hbase.HConstants" import="org.apache.hadoop.hbase.master.HMaster" import="org.apache.hadoop.hbase.zookeeper.MetaTableLocator" import="org.apache.hadoop.hbase.util.Bytes" import="org.apache.hadoop.hbase.util.FSUtils" import="org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState" import="org.apache.hadoop.hbase.TableName" + import="org.apache.hadoop.hbase.client.RegionReplicaUtil" import="org.apache.hadoop.hbase.HBaseConfiguration" %> <% HMaster master = (HMaster)getServletContext().getAttribute(HMaster.MASTER); @@ -53,6 +55,8 @@ ServerName rl = metaTableLocator.getMetaRegionLocation(master.getZooKeeper()); boolean showFragmentation = conf.getBoolean("hbase.master.ui.fragmentation.enabled", false); boolean readOnly = conf.getBoolean("hbase.master.ui.readonly", false); + int numMetaReplicas = conf.getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); Map frags = null; if (showFragmentation) { frags = FSUtils.getTableFragmentation(master); @@ -202,11 +206,14 @@ %> <%= tableHeader %> <% - // NOTE: Presumes one meta region only. - HRegionInfo meta = HRegionInfo.FIRST_META_REGIONINFO; - ServerName metaLocation = metaTableLocator.waitMetaRegionLocation(master.getZooKeeper(), 1); - for (int i = 0; i < 1; i++) { - String url = "//" + metaLocation.getHostname() + ":" + master.getRegionServerInfoPort(metaLocation) + "/"; + // NOTE: Presumes meta with one or more replicas + for (int j = 0; j < numMetaReplicas; j++) { + HRegionInfo meta = RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, j); + ServerName metaLocation = metaTableLocator.waitMetaRegionLocation(master.getZooKeeper(), j, 1); + for (int i = 0; i < 1; i++) { + String url = "//" + metaLocation.getHostname() + ":" + + master.getRegionServerInfoPort(metaLocation) + "/"; %> <%= escapeXml(meta.getRegionNameAsString()) %> @@ -217,6 +224,7 @@ - <% } %> +<%} %> <%} else { Admin admin = master.getConnection().getAdmin(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java new file mode 100644 index 00000000000..83ff822a96d --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java @@ -0,0 +1,393 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; +import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; +import static org.junit.Assert.*; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.RegionLocations; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.HBaseFsck; +import org.apache.hadoop.hbase.util.HBaseFsckRepair; +import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; +import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil; +import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Tests the scenarios where replicas are enabled for the meta table + */ +@Category(MediumTests.class) +public class TestMetaWithReplicas { + static final Log LOG = LogFactory.getLog(TestMetaWithReplicas.class); + private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @Before + public void setup() throws Exception { + TEST_UTIL.getConfiguration().setInt("zookeeper.session.timeout", 30000); + TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3); + TEST_UTIL.getConfiguration().setInt( + StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000); + TEST_UTIL.startMiniCluster(3); + // disable the balancer + LoadBalancerTracker l = new LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(), + new Abortable() { + boolean aborted = false; + @Override + public boolean isAborted() { + return aborted; + } + @Override + public void abort(String why, Throwable e) { + aborted = true; + } + }); + l.setBalancerOn(false); + for (int replicaId = 1; replicaId < 3; replicaId ++) { + HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, + replicaId); + TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(h); + } + LOG.debug("All meta replicas assigned"); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testMetaHTDReplicaCount() throws Exception { + assertTrue(TEST_UTIL.getHBaseAdmin().getTableDescriptor(TableName.META_TABLE_NAME) + .getRegionReplication() == 3); + } + + @Test + public void testZookeeperNodesForReplicas() throws Exception { + // Checks all the znodes exist when meta's replicas are enabled + ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + Configuration conf = TEST_UTIL.getConfiguration(); + String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, + HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); + String primaryMetaZnode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server")); + // check that the data in the znode is parseable (this would also mean the znode exists) + byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); + ServerName.parseFrom(data); + for (int i = 1; i < 3; i++) { + String secZnode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i); + String str = zkw.getZNodeForReplica(i); + assertTrue(str.equals(secZnode)); + // check that the data in the znode is parseable (this would also mean the znode exists) + data = ZKUtil.getData(zkw, secZnode); + ServerName.parseFrom(data); + } + } + + @Test + public void testShutdownHandling() throws Exception { + // This test creates a table, flushes the meta (with 3 replicas), kills the + // server holding the primary meta replica. Then it does a put/get into/from + // the test table. The put/get operations would use the replicas to locate the + // location of the test table's region + shutdownMetaAndDoValidations(TEST_UTIL); + } + + public static void shutdownMetaAndDoValidations(HBaseTestingUtility util) throws Exception { + // This test creates a table, flushes the meta (with 3 replicas), kills the + // server holding the primary meta replica. Then it does a put/get into/from + // the test table. The put/get operations would use the replicas to locate the + // location of the test table's region + ZooKeeperWatcher zkw = util.getZooKeeperWatcher(); + Configuration conf = util.getConfiguration(); + conf.setBoolean(HConstants.USE_META_REPLICAS, true); + + String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, + HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); + String primaryMetaZnode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server")); + byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); + ServerName primary = ServerName.parseFrom(data); + + byte[] TABLE = Bytes.toBytes("testShutdownHandling"); + byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") }; + if (util.getHBaseAdmin().tableExists(TABLE)) { + util.getHBaseAdmin().disableTable(TABLE); + util.getHBaseAdmin().deleteTable(TABLE); + } + Table htable = util.createTable(TABLE, FAMILIES, conf); + + util.getHBaseAdmin().flush(TableName.META_TABLE_NAME); + Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, + 30000) * 6); + Connection c = ConnectionFactory.createConnection(util.getConfiguration()); + List regions = MetaTableAccessor.getTableRegions(c, TableName.valueOf(TABLE)); + HRegionLocation hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0)); + // Ensure that the primary server for test table is not the same one as the primary + // of the meta region since we will be killing the srv holding the meta's primary... + // We want to be able to write to the test table even when the meta is not present .. + // If the servers are the same, then move the test table's region out of the server + // to another random server + if (hrl.getServerName().equals(primary)) { + util.getHBaseAdmin().move(hrl.getRegionInfo().getEncodedNameAsBytes(), null); + // wait for the move to complete + do { + Thread.sleep(10); + hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0)); + } while (primary.equals(hrl.getServerName())); + util.getHBaseAdmin().flush(TableName.META_TABLE_NAME); + Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, + 30000) * 3); + } + ServerName master = util.getHBaseClusterInterface().getClusterStatus().getMaster(); + // kill the master so that regionserver recovery is not triggered at all + // for the meta server + util.getHBaseClusterInterface().stopMaster(master); + util.getHBaseClusterInterface().waitForMasterToStop(master, 60000); + if (!master.equals(primary)) { + util.getHBaseClusterInterface().killRegionServer(primary); + util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000); + } + ((ClusterConnection)c).clearRegionCache(); + htable.close(); + htable = c.getTable(TableName.valueOf(TABLE)); + byte[] row = "test".getBytes(); + Put put = new Put(row); + put.add("foo".getBytes(), row, row); + BufferedMutator m = c.getBufferedMutator(TableName.valueOf(TABLE)); + m.mutate(put); + m.flush(); + // Try to do a get of the row that was just put + Get get = new Get(row); + Result r = htable.get(get); + assertTrue(Arrays.equals(r.getRow(), row)); + // now start back the killed servers and disable use of replicas. That would mean + // calls go to the primary + util.getHBaseClusterInterface().startMaster(master.getHostname(), 0); + util.getHBaseClusterInterface().startRegionServer(primary.getHostname(), 0); + util.getHBaseClusterInterface().waitForActiveAndReadyMaster(); + ((ClusterConnection)c).clearRegionCache(); + htable.close(); + conf.setBoolean(HConstants.USE_META_REPLICAS, false); + htable = c.getTable(TableName.valueOf(TABLE)); + r = htable.get(get); + assertTrue(Arrays.equals(r.getRow(), row)); + } + + @Test + public void testChangingReplicaCount() throws Exception { + // tests changing the replica count across master restarts + // reduce the replica count from 3 to 2 + stopMasterAndValidateReplicaCount(3, 2); + // increase the replica count from 2 to 3 + stopMasterAndValidateReplicaCount(2, 3); + } + + private void stopMasterAndValidateReplicaCount(int originalReplicaCount, int newReplicaCount) + throws Exception { + ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster(); + TEST_UTIL.getHBaseClusterInterface().stopMaster(sn); + TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000); + List metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); + assert(metaZnodes.size() == originalReplicaCount); //we should have what was configured before + TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM, + newReplicaCount); + TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0); + TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(); + int count = 0; + do { + metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes(); + Thread.sleep(10); + count++; + // wait for the count to be different from the originalReplicaCount. When the + // replica count is reduced, that will happen when the master unassigns excess + // replica, and deletes the excess znodes + } while (metaZnodes.size() == originalReplicaCount && count < 1000); + assert(metaZnodes.size() == newReplicaCount); + // also check if hbck returns without errors + TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, + newReplicaCount); + HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false); + HbckTestingUtil.assertNoErrors(hbck); + } + + @Test + public void testHBaseFsckWithMetaReplicas() throws Exception { + HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false); + HbckTestingUtil.assertNoErrors(hbck); + } + + @Test + public void testHBaseFsckWithFewerMetaReplicas() throws Exception { + ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( + TEST_UTIL.getConfiguration()); + RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, + false, false); + HBaseFsckRepair.closeRegionSilentlyAndWait(c, + rl.getRegionLocation(1).getServerName(), rl.getRegionLocation(1).getRegionInfo()); + // check that problem exists + HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); + // fix the problem + hbck = doFsck(TEST_UTIL.getConfiguration(), true); + // run hbck again to make sure we don't see any errors + hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{}); + } + + @Test + public void testHBaseFsckWithFewerMetaReplicaZnodes() throws Exception { + ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection( + TEST_UTIL.getConfiguration()); + RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, + false, false); + HBaseFsckRepair.closeRegionSilentlyAndWait(c, + rl.getRegionLocation(2).getServerName(), rl.getRegionLocation(2).getRegionInfo()); + ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(2)); + // check that problem exists + HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION}); + // fix the problem + hbck = doFsck(TEST_UTIL.getConfiguration(), true); + // run hbck again to make sure we don't see any errors + hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{}); + } + + @Test + public void testAccessingUnknownTables() throws Exception { + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + conf.setBoolean(HConstants.USE_META_REPLICAS, true); + Table table = TEST_UTIL.getConnection().getTable(TableName.valueOf("RandomTable")); + Get get = new Get(Bytes.toBytes("foo")); + try { + table.get(get); + } catch (TableNotFoundException t) { + return; + } + fail("Expected TableNotFoundException"); + } + + @Test + public void testMetaAddressChange() throws Exception { + // checks that even when the meta's location changes, the various + // caches update themselves. Uses the master operations to test + // this + Configuration conf = TEST_UTIL.getConfiguration(); + ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, + HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); + String primaryMetaZnode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.metaserver", "meta-region-server")); + // check that the data in the znode is parseable (this would also mean the znode exists) + byte[] data = ZKUtil.getData(zkw, primaryMetaZnode); + ServerName currentServer = ServerName.parseFrom(data); + Collection liveServers = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers(); + ServerName moveToServer = null; + for (ServerName s : liveServers) { + if (!currentServer.equals(s)) { + moveToServer = s; + } + } + assert(moveToServer != null); + String tableName = "randomTable5678"; + TEST_UTIL.createTable(TableName.valueOf(tableName), "f"); + assertTrue(TEST_UTIL.getHBaseAdmin().tableExists(tableName)); + TEST_UTIL.getHBaseAdmin().move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), + Bytes.toBytes(moveToServer.getServerName())); + int i = 0; + do { + Thread.sleep(10); + data = ZKUtil.getData(zkw, primaryMetaZnode); + currentServer = ServerName.parseFrom(data); + i++; + } while (!moveToServer.equals(currentServer) && i < 1000); //wait for 10 seconds overall + assert(i != 1000); + TEST_UTIL.getHBaseAdmin().disableTable("randomTable5678"); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableDisabled("randomTable5678")); + } + + @Test + public void testShutdownOfReplicaHolder() throws Exception { + // checks that the when the server holding meta replica is shut down, the meta replica + // can be recovered + RegionLocations rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()). + locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true); + HRegionLocation hrl = rl.getRegionLocation(1); + ServerName oldServer = hrl.getServerName(); + TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer); + int i = 0; + do { + LOG.debug("Waiting for the replica " + hrl.getRegionInfo() + " to come up"); + Thread.sleep(30000); //wait for the detection/recovery + rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()). + locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true); + hrl = rl.getRegionLocation(1); + i++; + } while ((hrl == null || hrl.getServerName().equals(oldServer)) && i < 3); + assertTrue(i != 3); + } + + @Test + public void testHBaseFsckWithExcessMetaReplicas() throws Exception { + HBaseFsck hbck = new HBaseFsck(TEST_UTIL.getConfiguration()); + // Create a meta replica (this will be the 4th one) and assign it + HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica( + HRegionInfo.FIRST_META_REGIONINFO, 3); + // create in-memory state otherwise master won't assign + TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager() + .getRegionStates().createRegionState(h); + TEST_UTIL.getMiniHBaseCluster().getMaster().assignRegion(h); + HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getHBaseAdmin(), h); + // check that problem exists + hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN, ERROR_CODE.SHOULD_NOT_BE_DEPLOYED}); + // fix the problem + hbck = doFsck(TEST_UTIL.getConfiguration(), true); + // run hbck again to make sure we don't see any errors + hbck = doFsck(TEST_UTIL.getConfiguration(), false); + assertErrors(hbck, new ERROR_CODE[]{}); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java index fc7f1365877..972834a39aa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java @@ -262,8 +262,8 @@ public class TestMasterNoCluster { TESTUTIL.getConfiguration()); HMaster master = new HMaster(conf, cp) { @Override - void assignMeta(MonitoredTask status, Set previouslyFailedMeatRSs) { - } + void assignMeta(MonitoredTask status, Set previouslyFailedMeatRSs, int replicaId) + { } @Override void initializeZKBasedSystemTrackers() throws IOException, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java index d6d82df6e71..6054cf57ea2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultMemStore.java @@ -33,22 +33,28 @@ import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueTestUtil; import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.TableDescriptor; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdge; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.wal.WALFactory; import org.junit.experimental.categories.Category; import com.google.common.base.Joiner; @@ -928,6 +934,36 @@ public class TestDefaultMemStore extends TestCase { } } + public void testShouldFlushMeta() throws Exception { + // write an edit in the META and ensure the shouldFlush (that the periodic memstore + // flusher invokes) returns true after META_CACHE_FLUSH_INTERVAL (even though + // the MEMSTORE_PERIODIC_FLUSH_INTERVAL is set to a higher value) + Configuration conf = new Configuration(); + conf.setInt(HRegion.MEMSTORE_PERIODIC_FLUSH_INTERVAL, HRegion.META_CACHE_FLUSH_INTERVAL * 10); + HBaseTestingUtility hbaseUtility = HBaseTestingUtility.createLocalHTU(conf); + Path testDir = hbaseUtility.getDataTestDir(); + EnvironmentEdgeForMemstoreTest edge = new EnvironmentEdgeForMemstoreTest(); + EnvironmentEdgeManager.injectEdge(edge); + edge.setCurrentTimeMillis(1234); + WALFactory wFactory = new WALFactory(conf, null, "1234"); + HRegion meta = HRegion.createHRegion(HRegionInfo.FIRST_META_REGIONINFO, testDir, + conf, TableDescriptor.metaTableDescriptor(conf), + wFactory.getMetaWAL(HRegionInfo.FIRST_META_REGIONINFO. + getEncodedNameAsBytes())); + HRegionInfo hri = new HRegionInfo(TableName.valueOf("testShouldFlushMeta"), + Bytes.toBytes("row_0200"), Bytes.toBytes("row_0300")); + HTableDescriptor desc = new HTableDescriptor(TableName.valueOf("testShouldFlushMeta")); + desc.addFamily(new HColumnDescriptor("foo".getBytes())); + HRegion r = + HRegion.createHRegion(hri, testDir, conf, desc, + wFactory.getWAL(hri.getEncodedNameAsBytes())); + HRegion.addRegionToMETA(meta, r); + edge.setCurrentTimeMillis(1234 + 100); + assertTrue(meta.shouldFlush() == false); + edge.setCurrentTimeMillis(edge.currentTime() + HRegion.META_CACHE_FLUSH_INTERVAL + 1); + assertTrue(meta.shouldFlush() == true); + } + private class EnvironmentEdgeForMemstoreTest implements EnvironmentEdge { long t = 1234; @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java index 0319051238e..2da1f7e7b7c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileRefresherChore.java @@ -145,7 +145,7 @@ public class TestStoreFileRefresherChore { boolean isStale = false; public StaleStorefileRefresherChore(int period, HRegionServer regionServer, Stoppable stoppable) { - super(period, regionServer, stoppable); + super(period, false, regionServer, stoppable); } @Override protected boolean isRegionStale(String encodedName, long time) {