From 02235f4e3275f929f087d4908e0aa33e1206a7db Mon Sep 17 00:00:00 2001 From: Josh Elser Date: Fri, 29 Jun 2018 11:09:33 +0800 Subject: [PATCH] HBASE-20792 info:servername and info:sn inconsistent for OPEN region Signed-off-by: zhangduo Signed-off-by: Michael Stack --- .../master/assignment/RegionStateStore.java | 11 +- .../hbase/master/assignment/RegionStates.java | 3 + .../assignment/TestRegionMoveAndAbandon.java | 129 ++++++++++++++++++ 3 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java index de9c4fd4683..aeef835dec7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java @@ -136,7 +136,7 @@ public class RegionStateStore { long openSeqNum = regionStateNode.getState() == State.OPEN ? regionStateNode.getOpenSeqNum() : HConstants.NO_SEQNUM; updateUserRegionLocation(regionStateNode.getRegionInfo(), regionStateNode.getState(), - regionStateNode.getRegionLocation(), regionStateNode.getLastHost(), openSeqNum, + regionStateNode.getRegionLocation(), openSeqNum, // The regionStateNode may have no procedure in a test scenario; allow for this. regionStateNode.getProcedure() != null ? regionStateNode.getProcedure().getProcId() : Procedure.NO_PROC_ID); @@ -153,10 +153,9 @@ public class RegionStateStore { } } - private void updateUserRegionLocation(final RegionInfo regionInfo, final State state, - final ServerName regionLocation, final ServerName lastHost, final long openSeqNum, - final long pid) - throws IOException { + private void updateUserRegionLocation(RegionInfo regionInfo, State state, + ServerName regionLocation, long openSeqNum, + long pid) throws IOException { long time = EnvironmentEdgeManager.currentTime(); final int replicaId = regionInfo.getReplicaId(); final Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(regionInfo), time); @@ -176,7 +175,7 @@ public class RegionStateStore { } info.append(", openSeqNum=").append(openSeqNum); info.append(", regionLocation=").append(regionLocation); - } else if (regionLocation != null && !regionLocation.equals(lastHost)) { + } else if (regionLocation != null) { // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients // currently; they want a server to hit. TODO: Make clients wait if no location. put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index 15a2fbc283e..3f98c418409 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -106,6 +106,9 @@ public class RegionStates { private volatile RegionTransitionProcedure procedure = null; private volatile ServerName regionLocation = null; + // notice that, the lastHost will only be updated when a region is successfully CLOSED through + // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync + // with the data in meta. private volatile ServerName lastHost = null; /** * A Region-in-Transition (RIT) moves through states. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java new file mode 100644 index 00000000000..2e9c4172982 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionMoveAndAbandon.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; + +/** + * Testcase for HBASE-20792. + */ +@Category({ LargeTests.class, MasterTests.class }) +public class TestRegionMoveAndAbandon { + private static final Logger LOG = LoggerFactory.getLogger(TestRegionMoveAndAbandon.class); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionMoveAndAbandon.class); + + @Rule + public TestName name = new TestName(); + + private HBaseTestingUtility UTIL; + private MiniHBaseCluster cluster; + private MiniZooKeeperCluster zkCluster; + private HRegionServer rs1; + private HRegionServer rs2; + private RegionInfo regionInfo; + + @Before + public void setup() throws Exception { + UTIL = new HBaseTestingUtility(); + zkCluster = UTIL.startMiniZKCluster(); + cluster = UTIL.startMiniHBaseCluster(1, 2); + rs1 = cluster.getRegionServer(0); + rs2 = cluster.getRegionServer(1); + assertEquals(2, cluster.getRegionServerThreads().size()); + // We'll use hbase:namespace for our testing + UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME, 30_000); + regionInfo = + Iterables.getOnlyElement(cluster.getRegions(TableName.NAMESPACE_TABLE_NAME)).getRegionInfo(); + } + + @After + public void teardown() throws Exception { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + if (zkCluster != null) { + zkCluster.shutdown(); + zkCluster = null; + } + } + + @Test + public void test() throws Exception { + LOG.info("Moving {} to {}", regionInfo, rs2.getServerName()); + // Move to RS2 + UTIL.moveRegionAndWait(regionInfo, rs2.getServerName()); + LOG.info("Moving {} to {}", regionInfo, rs1.getServerName()); + // Move to RS1 + UTIL.moveRegionAndWait(regionInfo, rs1.getServerName()); + LOG.info("Killing RS {}", rs1.getServerName()); + // Stop RS1 + cluster.killRegionServer(rs1.getServerName()); + // Region should get moved to RS2 + UTIL.waitTableAvailable(TableName.NAMESPACE_TABLE_NAME, 30_000); + // Restart the master + LOG.info("Killing master {}", cluster.getMaster().getServerName()); + cluster.killMaster(cluster.getMaster().getServerName()); + // Stop RS2 + LOG.info("Killing RS {}", rs2.getServerName()); + cluster.killRegionServer(rs2.getServerName()); + // Start up everything again + LOG.info("Starting cluster"); + UTIL.getMiniHBaseCluster().startMaster(); + UTIL.ensureSomeRegionServersAvailable(2); + + UTIL.waitFor(30_000, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + try (Table nsTable = UTIL.getConnection().getTable(TableName.NAMESPACE_TABLE_NAME)) { + // Doesn't matter what we're getting. We just want to make sure we can access the region + nsTable.get(new Get(Bytes.toBytes("a"))); + return true; + } + } + }); + } +}