HBASE-13895 DATALOSS: Region assigned before WAL replay when abort (Enis Soztutar) -- ADDENDUM
This commit is contained in:
parent
5e9b6b8c3d
commit
09846ff81a
@ -0,0 +1,34 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.regionserver;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown by the region server when it is aborting.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("serial")
|
||||||
|
@InterfaceAudience.Public
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class RegionServerAbortedException extends RegionServerStoppedException {
|
||||||
|
public RegionServerAbortedException(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
}
|
@ -1869,14 +1869,14 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||||||
t = ((RemoteException)t).unwrapRemoteException();
|
t = ((RemoteException)t).unwrapRemoteException();
|
||||||
}
|
}
|
||||||
boolean logRetries = true;
|
boolean logRetries = true;
|
||||||
if (t instanceof RegionServerAbortedException) {
|
if (t instanceof RegionServerAbortedException
|
||||||
// RS is aborting, we cannot offline the region since the region may need to do WAL
|
|| t instanceof RegionServerStoppedException) {
|
||||||
// recovery. Until we see the RS expiration, we should retry.
|
// RS is aborting or stopping, we cannot offline the region since the region may need
|
||||||
|
// to do WAL recovery. Until we see the RS expiration, we should retry.
|
||||||
sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
|
sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
|
||||||
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
|
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
|
||||||
|
|
||||||
} else if (t instanceof NotServingRegionException
|
} else if (t instanceof NotServingRegionException
|
||||||
|| t instanceof RegionServerStoppedException
|
|
||||||
|| t instanceof ServerNotRunningYetException) {
|
|| t instanceof ServerNotRunningYetException) {
|
||||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||||
+ ", it's not any more on " + server, t);
|
+ ", it's not any more on " + server, t);
|
||||||
|
@ -1028,12 +1028,6 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertTrue(regionStates.isRegionOnline(hri));
|
assertTrue(regionStates.isRegionOnline(hri));
|
||||||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||||
|
|
||||||
// Try to unassign the dead region before SSH
|
|
||||||
am.unassign(hri, false);
|
|
||||||
// The region should be moved to offline since the server is dead
|
|
||||||
RegionState state = regionStates.getRegionState(hri);
|
|
||||||
assertTrue(state.isOffline());
|
|
||||||
|
|
||||||
// Kill the hosting server, which doesn't have meta on it.
|
// Kill the hosting server, which doesn't have meta on it.
|
||||||
cluster.killRegionServer(oldServerName);
|
cluster.killRegionServer(oldServerName);
|
||||||
cluster.waitForRegionServerToStop(oldServerName, -1);
|
cluster.waitForRegionServerToStop(oldServerName, -1);
|
||||||
@ -1159,12 +1153,6 @@ public class TestAssignmentManagerOnCluster {
|
|||||||
assertTrue(regionStates.isRegionOnline(hri));
|
assertTrue(regionStates.isRegionOnline(hri));
|
||||||
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
|
||||||
|
|
||||||
// Try to unassign the dead region before SSH
|
|
||||||
am.unassign(hri, false);
|
|
||||||
// The region should be moved to offline since the server is dead
|
|
||||||
RegionState state = regionStates.getRegionState(hri);
|
|
||||||
assertTrue(state.isOffline());
|
|
||||||
|
|
||||||
// Disable the table now.
|
// Disable the table now.
|
||||||
master.disableTable(hri.getTable());
|
master.disableTable(hri.getTable());
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user