diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java index 44bb89f3c94..967d73a1c9f 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/VersionInfo.java @@ -33,6 +33,10 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; public class VersionInfo { private static final Log LOG = LogFactory.getLog(VersionInfo.class.getName()); + // If between two dots there is not a number, we regard it as a very large number so it is + // higher than any numbers in the version. + private static int VERY_LARGE_NUMBER = 100000; + /** * Get the hbase version. * @return the hbase version string, eg. "0.6.3-dev" @@ -108,6 +112,45 @@ public class VersionInfo { } } + public static int compareVersion(String v1, String v2) { + //fast compare equals first + if (v1.equals(v2)) { + return 0; + } + + String s1[] = v1.split("\\.|-");//1.2.3-hotfix -> [1, 2, 3, hotfix] + String s2[] = v2.split("\\.|-"); + int index = 0; + while (index < s1.length && index < s2.length) { + int va = VERY_LARGE_NUMBER, vb = VERY_LARGE_NUMBER; + try { + va = Integer.parseInt(s1[index]); + } catch (Exception ingore) { + } + try { + vb = Integer.parseInt(s2[index]); + } catch (Exception ingore) { + } + if (va != vb) { + return va - vb; + } + if (va == VERY_LARGE_NUMBER) { + // compare as String + int c = s1[index].compareTo(s2[index]); + if (c != 0) { + return c; + } + } + index++; + } + if (index < s1.length) { + // s1 is longer + return 1; + } + //s2 is longer + return -1; + } + public static void main(String[] args) { writeTo(System.out); } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestVersionInfo.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestVersionInfo.java new file mode 100644 index 00000000000..896c5241ecf --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestVersionInfo.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.testclassification.SmallTests; +import static org.junit.Assert.assertTrue; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestVersionInfo { + + @Test + public void testCompareVersion() { + assertTrue(VersionInfo.compareVersion("1.0.0", "0.98.11") > 0); + assertTrue(VersionInfo.compareVersion("0.98.11", "1.0.1") < 0); + assertTrue(VersionInfo.compareVersion("2.0.0", "1.4.0") > 0); + assertTrue(VersionInfo.compareVersion("2.0.0", "2.0.0-SNAPSHOT") < 0); + } +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index aa0d80368f1..67fda75815c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -886,6 +886,8 @@ public class HMaster extends HRegionServer implements MasterServices { // Set master as 'initialized'. setInitialized(true); + assignmentManager.checkIfShouldMoveSystemRegionAsync(); + status.setStatus("Assign meta replicas"); metaBootstrap.assignMetaReplicas(); @@ -1656,8 +1658,7 @@ public class HMaster extends HRegionServer implements MasterServices { // Replace with an async implementation from which you can get // a success/failure result. @VisibleForTesting - public void move(final byte[] encodedRegionName, - final byte[] destServerName) throws HBaseIOException { + public void move(final byte[] encodedRegionName, byte[] destServerName) throws HBaseIOException { RegionState regionState = assignmentManager.getRegionStates(). getRegionState(Bytes.toString(encodedRegionName)); @@ -1669,11 +1670,19 @@ public class HMaster extends HRegionServer implements MasterServices { } ServerName dest; + List exclude = hri.isSystemTable() ? assignmentManager.getExcludedServersForSystemTable() + : new ArrayList<>(1); + if (destServerName != null && exclude.contains(ServerName.valueOf(Bytes.toString(destServerName)))) { + LOG.info( + Bytes.toString(encodedRegionName) + " can not move to " + Bytes.toString(destServerName) + + " because the server is in exclude list"); + destServerName = null; + } if (destServerName == null || destServerName.length == 0) { LOG.info("Passed destination servername is null/empty so " + "choosing a server at random"); - final List destServers = this.serverManager.createDestinationServersList( - regionState.getServerName()); + exclude.add(regionState.getServerName()); + final List destServers = this.serverManager.createDestinationServersList(exclude); dest = balancer.randomAssignment(hri, destServers); if (dest == null) { LOG.debug("Unable to determine a plan to assign " + hri); @@ -2584,7 +2593,12 @@ public class HMaster extends HRegionServer implements MasterServices { if (info != null && info.hasVersionInfo()) { return info.getVersionInfo().getVersion(); } - return "Unknown"; + return "0.0.0"; //Lowest version to prevent move system region to unknown version RS. + } + + @Override + public void checkIfShouldMoveSystemRegionAsync() { + assignmentManager.checkIfShouldMoveSystemRegionAsync(); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 4d7d561c09f..c515435dd6d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -513,4 +513,8 @@ public interface MasterServices extends Server { * @return {@link LockManager} to lock namespaces/tables/regions. */ LockManager getLockManager(); + + public String getRegionServerVersion(final ServerName sn); + + public void checkIfShouldMoveSystemRegionAsync(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 6ae9f0fabce..4dff6f42193 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -1206,13 +1206,13 @@ public class ServerManager { /** * Creates a list of possible destinations for a region. It contains the online servers, but not * the draining or dying servers. - * @param serverToExclude can be null if there is no server to exclude + * @param serversToExclude can be null if there is no server to exclude */ - public List createDestinationServersList(final ServerName serverToExclude){ + public List createDestinationServersList(final List serversToExclude){ final List destServers = getOnlineServersList(); - if (serverToExclude != null){ - destServers.remove(serverToExclude); + if (serversToExclude != null){ + destServers.removeAll(serversToExclude); } // Loop through the draining server list and remove them from the server list diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index a7fb743e205..046612af9ab 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -86,8 +87,10 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.VersionInfo; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; /** * The AssignmentManager is the coordinator for region assign/unassign operations. @@ -170,6 +173,8 @@ public class AssignmentManager implements ServerListener { private final int assignDispatchWaitMillis; private final int assignMaxAttempts; + private final Object checkIfShouldMoveSystemRegionLock = new Object(); + private Thread assignThread; public AssignmentManager(final MasterServices master) { @@ -452,6 +457,55 @@ public class AssignmentManager implements ServerListener { ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc); } + /** + * Start a new thread to check if there are region servers whose versions are higher than others. + * If so, move all system table regions to RS with the highest version to keep compatibility. + * The reason is, RS in new version may not be able to access RS in old version when there are + * some incompatible changes. + */ + public void checkIfShouldMoveSystemRegionAsync() { + new Thread(() -> { + try { + synchronized (checkIfShouldMoveSystemRegionLock) { + List serverList = master.getServerManager() + .createDestinationServersList(getExcludedServersForSystemTable()); + List plans = new ArrayList<>(); + for (ServerName server : getExcludedServersForSystemTable()) { + List regionsShouldMove = getCarryingSystemTables(server); + if (!regionsShouldMove.isEmpty()) { + for (HRegionInfo regionInfo : regionsShouldMove) { + RegionPlan plan = new RegionPlan(regionInfo, server, + getBalancer().randomAssignment(regionInfo, serverList)); + if (regionInfo.isMetaRegion()) { + // Must move meta region first. + moveAsync(plan); + } else { + plans.add(plan); + } + } + } + for (RegionPlan plan : plans) { + moveAsync(plan); + } + } + } + } catch (Throwable t) { + LOG.error(t); + } + }).start(); + } + + private List getCarryingSystemTables(ServerName serverName) { + Set regions = this.getRegionStates().getServerNode(serverName).getRegions(); + if (regions == null) { + return new ArrayList<>(); + } + return regions.stream() + .map(RegionStateNode::getRegionInfo) + .filter(HRegionInfo::isSystemTable) + .collect(Collectors.toList()); + } + public void assign(final HRegionInfo regionInfo) throws IOException { assign(regionInfo, true); } @@ -617,6 +671,19 @@ public class AssignmentManager implements ServerListener { } public MoveRegionProcedure createMoveRegionProcedure(final RegionPlan plan) { + if (plan.getRegionInfo().isSystemTable()) { + List exclude = getExcludedServersForSystemTable(); + if (plan.getDestination() != null && exclude.contains(plan.getDestination())) { + try { + LOG.info("Can not move " + plan.getRegionInfo() + " to " + plan.getDestination() + + " because the server is not with highest version"); + plan.setDestination(getBalancer().randomAssignment(plan.getRegionInfo(), + this.master.getServerManager().createDestinationServersList(exclude))); + } catch (HBaseIOException e) { + LOG.warn(e); + } + } + } return new MoveRegionProcedure(getProcedureEnvironment(), plan); } @@ -1686,6 +1753,26 @@ public class AssignmentManager implements ServerListener { } } + /** + * Get a list of servers that this region can not assign to. + * For system table, we must assign them to a server with highest version. + */ + public List getExcludedServersForSystemTable() { + List> serverList = master.getServerManager().getOnlineServersList() + .stream() + .map((s)->new Pair<>(s, master.getRegionServerVersion(s))) + .collect(Collectors.toList()); + if (serverList.isEmpty()) { + return new ArrayList<>(); + } + String highestVersion = Collections.max(serverList, + (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond())).getSecond(); + return serverList.stream() + .filter((p)->!p.getSecond().equals(highestVersion)) + .map(Pair::getFirst) + .collect(Collectors.toList()); + } + // ============================================================================================ // Server Helpers // ============================================================================================ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java index a6a5c179326..078640ecd31 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java @@ -27,9 +27,9 @@ import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionServerInfo; @@ -50,10 +50,10 @@ public class RegionServerTracker extends ZooKeeperListener { private static final Log LOG = LogFactory.getLog(RegionServerTracker.class); private NavigableMap regionServers = new TreeMap<>(); private ServerManager serverManager; - private Server server; + private MasterServices server; public RegionServerTracker(ZooKeeperWatcher watcher, - Server server, ServerManager serverManager) { + MasterServices server, ServerManager serverManager) { super(watcher); this.server = server; this.serverManager = serverManager; @@ -71,10 +71,10 @@ public class RegionServerTracker extends ZooKeeperListener { watcher.registerListener(this); List servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.znodePaths.rsZNode); - add(servers); + refresh(servers); } - private void add(final List servers) throws IOException { + private void refresh(final List servers) throws IOException { synchronized(this.regionServers) { this.regionServers.clear(); for (String n: servers) { @@ -102,6 +102,9 @@ public class RegionServerTracker extends ZooKeeperListener { } } } + if (server.isInitialized()) { + server.checkIfShouldMoveSystemRegionAsync(); + } } private void remove(final ServerName sn) { @@ -134,7 +137,7 @@ public class RegionServerTracker extends ZooKeeperListener { try { List servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.znodePaths.rsZNode); - add(servers); + refresh(servers); } catch (IOException e) { server.abort("Unexpected zk exception getting RS nodes", e); } catch (KeeperException e) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index 1708bbe94d1..8f878b85b44 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -442,6 +442,15 @@ public class MockNoopMasterServices implements MasterServices, Server { return null; } + @Override + public String getRegionServerVersion(ServerName sn) { + return null; + } + + @Override + public void checkIfShouldMoveSystemRegionAsync() { + } + @Override public long dispatchMergingRegions(HRegionInfo region_a, HRegionInfo region_b, boolean forcible, long nonceGroup, long nonce) throws IOException {