HBASE-13895 DATALOSS: Region assigned before WAL replay when abort (Enis Soztutar) -- ADDENDUM

This commit is contained in:
stack 2015-07-01 21:38:05 -07:00
parent 5e9b6b8c3d
commit 09846ff81a
3 changed files with 38 additions and 16 deletions

View File

@ -0,0 +1,34 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
/**
* Thrown by the region server when it is aborting.
*/
@SuppressWarnings("serial")
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class RegionServerAbortedException extends RegionServerStoppedException {
public RegionServerAbortedException(String s) {
super(s);
}
}

View File

@ -1869,14 +1869,14 @@ public class AssignmentManager extends ZooKeeperListener {
t = ((RemoteException)t).unwrapRemoteException();
}
boolean logRetries = true;
if (t instanceof RegionServerAbortedException) {
// RS is aborting, we cannot offline the region since the region may need to do WAL
// recovery. Until we see the RS expiration, we should retry.
if (t instanceof RegionServerAbortedException
|| t instanceof RegionServerStoppedException) {
// RS is aborting or stopping, we cannot offline the region since the region may need
// to do WAL recovery. Until we see the RS expiration, we should retry.
sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
} else if (t instanceof NotServingRegionException
|| t instanceof RegionServerStoppedException
|| t instanceof ServerNotRunningYetException) {
LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t);

View File

@ -1028,12 +1028,6 @@ public class TestAssignmentManagerOnCluster {
assertTrue(regionStates.isRegionOnline(hri));
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
// Kill the hosting server, which doesn't have meta on it.
cluster.killRegionServer(oldServerName);
cluster.waitForRegionServerToStop(oldServerName, -1);
@ -1159,12 +1153,6 @@ public class TestAssignmentManagerOnCluster {
assertTrue(regionStates.isRegionOnline(hri));
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
// Disable the table now.
master.disableTable(hri.getTable());