HBASE-21494 NPE when loading RecoverStandByProcedure
This commit is contained in:
parent
f555258e7a
commit
b329e6e3f2
|
@ -778,7 +778,6 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
this.splitOrMergeTracker.start();
|
this.splitOrMergeTracker.start();
|
||||||
|
|
||||||
this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
|
this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
|
||||||
this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
|
|
||||||
|
|
||||||
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
|
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
|
||||||
this.drainingServerTracker.start();
|
this.drainingServerTracker.start();
|
||||||
|
@ -949,7 +948,10 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
}
|
}
|
||||||
|
|
||||||
status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
|
status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
|
||||||
|
// The below two managers must be created before loading procedures, as they will be used during
|
||||||
|
// loading.
|
||||||
this.serverManager = createServerManager(this);
|
this.serverManager = createServerManager(this);
|
||||||
|
this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
|
||||||
createProcedureExecutor();
|
createProcedureExecutor();
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType =
|
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType =
|
||||||
|
|
|
@ -146,12 +146,12 @@ public class SyncReplicationReplayWALManager {
|
||||||
this.fs = services.getMasterFileSystem().getWALFileSystem();
|
this.fs = services.getMasterFileSystem().getWALFileSystem();
|
||||||
this.walRootDir = services.getMasterFileSystem().getWALRootDir();
|
this.walRootDir = services.getMasterFileSystem().getWALRootDir();
|
||||||
this.remoteWALDir = new Path(this.walRootDir, ReplicationUtils.REMOTE_WAL_DIR_NAME);
|
this.remoteWALDir = new Path(this.walRootDir, ReplicationUtils.REMOTE_WAL_DIR_NAME);
|
||||||
MasterProcedureScheduler scheduler =
|
|
||||||
services.getMasterProcedureExecutor().getEnvironment().getProcedureScheduler();
|
|
||||||
serverManager.registerListener(new ServerListener() {
|
serverManager.registerListener(new ServerListener() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serverAdded(ServerName serverName) {
|
public void serverAdded(ServerName serverName) {
|
||||||
|
MasterProcedureScheduler scheduler =
|
||||||
|
services.getMasterProcedureExecutor().getEnvironment().getProcedureScheduler();
|
||||||
for (UsedReplayWorkersForPeer usedWorkers : usedWorkersByPeer.values()) {
|
for (UsedReplayWorkersForPeer usedWorkers : usedWorkersByPeer.values()) {
|
||||||
synchronized (usedWorkers) {
|
synchronized (usedWorkers) {
|
||||||
usedWorkers.wake(scheduler);
|
usedWorkers.wake(scheduler);
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master.replication;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE;
|
||||||
|
import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||||
|
import org.apache.hadoop.hbase.replication.SyncReplicationState;
|
||||||
|
import org.apache.hadoop.hbase.replication.SyncReplicationTestBase;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||||
|
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for HBASE-21494.
|
||||||
|
*/
|
||||||
|
@Category({ MasterTests.class, LargeTests.class })
|
||||||
|
public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBase {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestRegisterPeerWorkerWhenRestarting.class);
|
||||||
|
|
||||||
|
private static volatile boolean FAIL = false;
|
||||||
|
|
||||||
|
public static final class HMasterForTest extends HMaster {
|
||||||
|
|
||||||
|
public HMasterForTest(Configuration conf) throws IOException, KeeperException {
|
||||||
|
super(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remoteProcedureCompleted(long procId) {
|
||||||
|
if (FAIL && getMasterProcedureExecutor()
|
||||||
|
.getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure) {
|
||||||
|
throw new RuntimeException("Inject error");
|
||||||
|
}
|
||||||
|
super.remoteProcedureCompleted(procId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
|
||||||
|
SyncReplicationTestBase.setUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRestart() throws Exception {
|
||||||
|
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
||||||
|
SyncReplicationState.STANDBY);
|
||||||
|
UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
||||||
|
SyncReplicationState.ACTIVE);
|
||||||
|
|
||||||
|
UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
|
||||||
|
write(UTIL1, 0, 100);
|
||||||
|
Thread.sleep(2000);
|
||||||
|
// peer is disabled so no data have been replicated
|
||||||
|
verifyNotReplicatedThroughRegion(UTIL2, 0, 100);
|
||||||
|
|
||||||
|
// transit the A to DA first to avoid too many error logs.
|
||||||
|
UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
||||||
|
SyncReplicationState.DOWNGRADE_ACTIVE);
|
||||||
|
HMaster master = UTIL2.getHBaseCluster().getMaster();
|
||||||
|
// make sure the transiting can not succeed
|
||||||
|
FAIL = true;
|
||||||
|
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
|
||||||
|
Thread t = new Thread() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
||||||
|
SyncReplicationState.DOWNGRADE_ACTIVE);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
t.start();
|
||||||
|
// wait until we are in the states where we need to register peer worker when restarting
|
||||||
|
UTIL2.waitFor(60000,
|
||||||
|
() -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure)
|
||||||
|
.map(p -> (RecoverStandbyProcedure) p)
|
||||||
|
.anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE ||
|
||||||
|
p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE));
|
||||||
|
// failover to another master
|
||||||
|
MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread();
|
||||||
|
mt.getMaster().abort("for testing");
|
||||||
|
mt.join();
|
||||||
|
FAIL = false;
|
||||||
|
t.join();
|
||||||
|
// make sure the new master can finish the transiting
|
||||||
|
assertEquals(SyncReplicationState.DOWNGRADE_ACTIVE,
|
||||||
|
UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID));
|
||||||
|
verify(UTIL2, 0, 100);
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.master.replication;
|
package org.apache.hadoop.hbase.master.replication;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UncheckedIOException;
|
import java.io.UncheckedIOException;
|
||||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
@ -90,5 +92,8 @@ public class TestTransitPeerSyncReplicationStateProcedureRetry extends SyncRepli
|
||||||
.mapToLong(Procedure::getProcId).min().getAsLong();
|
.mapToLong(Procedure::getProcId).min().getAsLong();
|
||||||
MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
|
MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
|
||||||
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
|
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
|
||||||
|
assertEquals(SyncReplicationState.DOWNGRADE_ACTIVE,
|
||||||
|
UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID));
|
||||||
|
verify(UTIL2, 0, 100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue