HBASE-26842 TestSnapshotProcedure fails in branch-2 (#4225)

Closes #4225

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
huiruan 2022-03-18 16:20:20 +08:00 committed by Duo Zhang
parent 81398f9064
commit 4a6ba116bf
12 changed files with 581 additions and 600 deletions

View File

@ -2272,8 +2272,10 @@ public interface Admin extends Abortable, Closeable {
* @throws SnapshotCreationException if snapshot failed to be taken
* @throws IllegalArgumentException if the snapshot request is formatted incorrectly
*/
void snapshot(SnapshotDescription snapshot)
throws IOException, SnapshotCreationException, IllegalArgumentException;
default void snapshot(SnapshotDescription snapshot)
throws IOException, SnapshotCreationException, IllegalArgumentException {
get(snapshotAsync(snapshot), getSyncWaitTimeout(), TimeUnit.MILLISECONDS);
}
/**
* Take a snapshot without waiting for the server to complete that snapshot (asynchronous) Only a

View File

@ -2513,86 +2513,50 @@ public class HBaseAdmin implements Admin {
}
@Override
public void snapshot(SnapshotDescription snapshotDesc)
throws IOException, SnapshotCreationException, IllegalArgumentException {
// actually take the snapshot
SnapshotProtos.SnapshotDescription snapshot =
ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotDesc);
SnapshotResponse response = asyncSnapshot(snapshot);
final IsSnapshotDoneRequest request =
IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot).build();
IsSnapshotDoneResponse done = null;
long start = EnvironmentEdgeManager.currentTime();
long max = response.getExpectedTimeout();
long maxPauseTime = max / this.numRetries;
int tries = 0;
LOG.debug("Waiting a max of " + max + " ms for snapshot '" +
ClientSnapshotDescriptionUtils.toString(snapshot) + "'' to complete. (max " +
maxPauseTime + " ms per retry)");
while (tries == 0
|| ((EnvironmentEdgeManager.currentTime() - start) < max && !done.getDone())) {
try {
// sleep a backoff <= pauseTime amount
long sleep = getPauseTime(tries++);
sleep = sleep > maxPauseTime ? maxPauseTime : sleep;
LOG.debug("(#" + tries + ") Sleeping: " + sleep +
"ms while waiting for snapshot completion.");
Thread.sleep(sleep);
} catch (InterruptedException e) {
throw (InterruptedIOException)new InterruptedIOException("Interrupted").initCause(e);
}
LOG.debug("Getting current status of snapshot from master...");
done = executeCallable(new MasterCallable<IsSnapshotDoneResponse>(getConnection(),
getRpcControllerFactory()) {
@Override
protected IsSnapshotDoneResponse rpcCall() throws Exception {
return master.isSnapshotDone(getRpcController(), request);
}
});
}
if (!done.getDone()) {
throw new SnapshotCreationException("Snapshot '" + snapshot.getName()
+ "' wasn't completed in expectedTime:" + max + " ms", snapshotDesc);
public Future<Void> snapshotAsync(SnapshotDescription snapshotDesc)
throws IOException, SnapshotCreationException {
SnapshotResponse resp = asyncSnapshot(ProtobufUtil
.createHBaseProtosSnapshotDesc(snapshotDesc));
// This is for keeping compatibility with old implementation.
// If there is a procId field in the response, then the snapshot will be operated with a
// SnapshotProcedure, otherwise the snapshot will be coordinated by zk.
if (resp.hasProcId()) {
return new SnapshotFuture(this, snapshotDesc, resp.getProcId());
} else {
return new SnapshotFuture(this, snapshotDesc, null);
}
}
@Override
public Future<Void> snapshotAsync(SnapshotDescription snapshotDesc)
throws IOException, SnapshotCreationException {
asyncSnapshot(ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotDesc));
return new ProcedureFuture<Void>(this, null) {
private static final class SnapshotFuture extends TableFuture<Void> {
private final SnapshotDescription snapshotDesc;
@Override
protected Void waitOperationResult(long deadlineTs) throws IOException, TimeoutException {
waitForState(deadlineTs, new WaitForStateCallable() {
public SnapshotFuture(HBaseAdmin admin, SnapshotDescription snapshotDesc, Long procId) {
super(admin, snapshotDesc.getTableName(), procId);
this.snapshotDesc = snapshotDesc;
}
@Override
public void throwInterruptedException() throws InterruptedIOException {
throw new InterruptedIOException(
"Interrupted while waiting for taking snapshot" + snapshotDesc);
}
@Override
public String getOperationType() {
return "SNAPSHOT";
}
@Override
public void throwTimeoutException(long elapsedTime) throws TimeoutException {
throw new TimeoutException("Snapshot '" + snapshotDesc.getName() +
"' wasn't completed in expectedTime:" + elapsedTime + " ms");
}
@Override
public boolean checkState(int tries) throws IOException {
return isSnapshotFinished(snapshotDesc);
}
});
return null;
}
};
@Override
protected Void waitOperationResult(long deadlineTs) throws IOException, TimeoutException {
waitForState(deadlineTs, new TableWaitForStateCallable() {
@Override
public boolean checkState(int tries) throws IOException {
return getAdmin().isSnapshotFinished(snapshotDesc);
}
});
return null;
}
}
private SnapshotResponse asyncSnapshot(SnapshotProtos.SnapshotDescription snapshot)
throws IOException {
ClientSnapshotDescriptionUtils.assertSnapshotRequestIsValid(snapshot);
final SnapshotRequest request = SnapshotRequest.newBuilder().setSnapshot(snapshot)
.build();
.setNonceGroup(ng.newNonce()).setNonce(ng.newNonce()).build();
// run the snapshot on the master
return executeCallable(new MasterCallable<SnapshotResponse>(getConnection(),
getRpcControllerFactory()) {

View File

@ -1,199 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ipc.HBaseRpcController;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TestName;
import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.SnapshotResponse;
/**
* Test snapshot logic from the client
*/
@Category({SmallTests.class, ClientTests.class})
public class TestSnapshotFromAdmin {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotFromAdmin.class);
private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotFromAdmin.class);
@Rule
public TestName name = new TestName();
/**
* Test that the logic for doing 'correct' back-off based on exponential increase and the max-time
* passed from the server ensures the correct overall waiting for the snapshot to finish.
* @throws Exception
*/
@Test
public void testBackoffLogic() throws Exception {
final int pauseTime = 100;
final int maxWaitTime =
HConstants.RETRY_BACKOFF[HConstants.RETRY_BACKOFF.length - 1] * pauseTime;
final int numRetries = HConstants.RETRY_BACKOFF.length;
// calculate the wait time, if we just do straight backoff (ignoring the expected time from
// master)
long ignoreExpectedTime = 0;
for (int i = 0; i < HConstants.RETRY_BACKOFF.length; i++) {
ignoreExpectedTime += HConstants.RETRY_BACKOFF[i] * pauseTime;
}
// the correct wait time, capping at the maxTime/tries + fudge room
final long time = pauseTime * 3L + ((maxWaitTime / numRetries) * 3) + 300L;
assertTrue("Capped snapshot wait time isn't less that the uncapped backoff time "
+ "- further testing won't prove anything.", time < ignoreExpectedTime);
// setup the mocks
ConnectionImplementation mockConnection = Mockito
.mock(ConnectionImplementation.class);
Configuration conf = HBaseConfiguration.create();
// setup the conf to match the expected properties
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, numRetries);
conf.setLong("hbase.client.pause", pauseTime);
// mock the master admin to our mock
MasterKeepAliveConnection mockMaster = Mockito.mock(MasterKeepAliveConnection.class);
Mockito.when(mockConnection.getConfiguration()).thenReturn(conf);
Mockito.when(mockConnection.getMaster()).thenReturn(mockMaster);
// we need a real retrying caller
RpcRetryingCallerFactory callerFactory = new RpcRetryingCallerFactory(conf);
RpcControllerFactory controllerFactory = Mockito.mock(RpcControllerFactory.class);
Mockito.when(controllerFactory.newController()).thenReturn(
Mockito.mock(HBaseRpcController.class));
Mockito.when(mockConnection.getRpcRetryingCallerFactory()).thenReturn(callerFactory);
Mockito.when(mockConnection.getRpcControllerFactory()).thenReturn(controllerFactory);
// set the max wait time for the snapshot to complete
SnapshotResponse response = SnapshotResponse.newBuilder()
.setExpectedTimeout(maxWaitTime)
.build();
Mockito
.when(
mockMaster.snapshot((RpcController) Mockito.any(),
Mockito.any())).thenReturn(response);
// setup the response
IsSnapshotDoneResponse.Builder builder = IsSnapshotDoneResponse.newBuilder();
builder.setDone(false);
// first five times, we return false, last we get success
Mockito.when(
mockMaster.isSnapshotDone((RpcController) Mockito.any(),
Mockito.any())).thenReturn(builder.build(), builder.build(),
builder.build(), builder.build(), builder.build(), builder.setDone(true).build());
// setup the admin and run the test
Admin admin = new HBaseAdmin(mockConnection);
String snapshot = "snapshot";
final TableName table = TableName.valueOf(name.getMethodName());
// get start time
long start = EnvironmentEdgeManager.currentTime();
admin.snapshot(snapshot, table);
long finish = EnvironmentEdgeManager.currentTime();
long elapsed = (finish - start);
assertTrue("Elapsed time:" + elapsed + " is more than expected max:" + time, elapsed <= time);
admin.close();
}
/**
* Make sure that we validate the snapshot name and the table name before we pass anything across
* the wire
* @throws Exception on failure
*/
@Test
public void testValidateSnapshotName() throws Exception {
ConnectionImplementation mockConnection = Mockito
.mock(ConnectionImplementation.class);
Configuration conf = HBaseConfiguration.create();
Mockito.when(mockConnection.getConfiguration()).thenReturn(conf);
// we need a real retrying caller
RpcRetryingCallerFactory callerFactory = new RpcRetryingCallerFactory(conf);
RpcControllerFactory controllerFactory = Mockito.mock(RpcControllerFactory.class);
Mockito.when(controllerFactory.newController()).thenReturn(
Mockito.mock(HBaseRpcController.class));
Mockito.when(mockConnection.getRpcRetryingCallerFactory()).thenReturn(callerFactory);
Mockito.when(mockConnection.getRpcControllerFactory()).thenReturn(controllerFactory);
Admin admin = new HBaseAdmin(mockConnection);
// check that invalid snapshot names fail
failSnapshotStart(admin, new SnapshotDescription(HConstants.SNAPSHOT_DIR_NAME));
failSnapshotStart(admin, new SnapshotDescription("-snapshot"));
failSnapshotStart(admin, new SnapshotDescription("snapshot fails"));
failSnapshotStart(admin, new SnapshotDescription("snap$hot"));
failSnapshotStart(admin, new SnapshotDescription("snap:hot"));
// check the table name also get verified
failSnapshotDescriptorCreation("snapshot", ".table");
failSnapshotDescriptorCreation("snapshot", "-table");
failSnapshotDescriptorCreation("snapshot", "table fails");
failSnapshotDescriptorCreation("snapshot", "tab%le");
// mock the master connection
MasterKeepAliveConnection master = Mockito.mock(MasterKeepAliveConnection.class);
Mockito.when(mockConnection.getMaster()).thenReturn(master);
SnapshotResponse response = SnapshotResponse.newBuilder().setExpectedTimeout(0).build();
Mockito.when(
master.snapshot((RpcController) Mockito.any(), Mockito.any()))
.thenReturn(response);
IsSnapshotDoneResponse doneResponse = IsSnapshotDoneResponse.newBuilder().setDone(true).build();
Mockito.when(
master.isSnapshotDone((RpcController) Mockito.any(),
Mockito.any())).thenReturn(doneResponse);
// make sure that we can use valid names
admin.snapshot(new SnapshotDescription("snapshot", TableName.valueOf(name.getMethodName())));
}
private void failSnapshotStart(Admin admin, SnapshotDescription snapshot)
throws IOException {
try {
admin.snapshot(snapshot);
fail("Snapshot should not have succeed with name:" + snapshot.getName());
} catch (IllegalArgumentException e) {
LOG.debug("Correctly failed to start snapshot:" + e.getMessage());
}
}
private void failSnapshotDescriptorCreation(final String snapshotName, final String tableName) {
try {
new SnapshotDescription(snapshotName, tableName);
fail("SnapshotDescription should not have succeed with name:" + snapshotName);
} catch (IllegalArgumentException e) {
LOG.debug("Correctly failed to create SnapshotDescription:" + e.getMessage());
}
}
}

View File

@ -18,50 +18,30 @@
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.SnapshotDescription;
import org.apache.hadoop.hbase.client.SnapshotType;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.RegionSplitter;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
import org.mockito.internal.stubbing.answers.AnswersWithDelay;
@ -69,27 +49,25 @@ import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedure {
private static final Logger LOG = LoggerFactory.getLogger(TestSnapshotProcedure.class);
protected static final Logger LOG = LoggerFactory.getLogger(TestSnapshotProcedure.class);
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedure.class);
private static HBaseTestingUtility TEST_UTIL;
private HMaster master;
private TableName TABLE_NAME;
private byte[] CF;
private String SNAPSHOT_NAME;
private SnapshotDescription snapshot;
private SnapshotProtos.SnapshotDescription snapshotProto;
protected static HBaseTestingUtility TEST_UTIL;
protected HMaster master;
protected TableName TABLE_NAME;
protected byte[] CF;
protected String SNAPSHOT_NAME;
protected SnapshotDescription snapshot;
protected SnapshotProtos.SnapshotDescription snapshotProto;
@Before
public void setup() throws Exception {
@ -117,96 +95,6 @@ public class TestSnapshotProcedure {
TEST_UTIL.loadTable(table, CF, false);
}
@Test
public void testSimpleSnapshotTable() throws Exception {
TEST_UTIL.getAdmin().snapshot(snapshot);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test
public void testMasterRestart() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
SnapshotProcedure spySp = getDelayedOnSpecificStateSnapshotProcedure(sp,
procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
long procId = procExec.submitProcedure(spySp);
TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures()
.stream().map(Procedure::getProcId).collect(Collectors.toList()).contains(procId));
TEST_UTIL.getHBaseCluster().killMaster(master.getServerName());
TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000);
TEST_UTIL.getHBaseCluster().startMaster();
TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
master = TEST_UTIL.getHBaseCluster().getMaster();
assertTrue(master.getSnapshotManager().isTakingAnySnapshot());
assertTrue(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME));
List<SnapshotProcedure> unfinishedProcedures = master
.getMasterProcedureExecutor().getProcedures().stream()
.filter(p -> p instanceof SnapshotProcedure)
.filter(p -> !p.isFinished()).map(p -> (SnapshotProcedure) p)
.collect(Collectors.toList());
assertEquals(unfinishedProcedures.size(), 1);
long newProcId = unfinishedProcedures.get(0).getProcId();
assertEquals(procId, newProcId);
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), newProcId);
assertFalse(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME));
List<SnapshotProtos.SnapshotDescription> snapshots
= master.getSnapshotManager().getCompletedSnapshots();
assertEquals(1, snapshots.size());
assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName());
assertEquals(TABLE_NAME, TableName.valueOf(snapshots.get(0).getTable()));
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test
public void testRegionServerCrashWhileTakingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
long procId = procExec.submitProcedure(sp);
SnapshotRegionProcedure snp = waitProcedureRunnableAndGetFirst(
SnapshotRegionProcedure.class, 60000);
ServerName targetServer = env.getAssignmentManager().getRegionStates()
.getRegionStateNode(snp.getRegion()).getRegionLocation();
TEST_UTIL.getHBaseCluster().killRegionServer(targetServer);
TEST_UTIL.waitFor(60000, () -> snp.inRetrying());
ProcedureTestingUtility.waitProcedure(procExec, procId);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test
public void testRegionServerCrashWhileVerifyingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
long procId = procExec.submitProcedure(sp);
SnapshotVerifyProcedure svp = waitProcedureRunnableAndGetFirst(
SnapshotVerifyProcedure.class, 60000);
TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null);
ServerName previousTargetServer = svp.getServerName();
HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(previousTargetServer);
TEST_UTIL.getHBaseCluster().killRegionServer(rs.getServerName());
TEST_UTIL.waitFor(60000, () -> svp.getServerName() != null
&& !svp.getServerName().equals(previousTargetServer));
ProcedureTestingUtility.waitProcedure(procExec, procId);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
public <T extends Procedure<MasterProcedureEnv>> T waitProcedureRunnableAndGetFirst(
Class<T> clazz, long timeout) throws IOException {
TEST_UTIL.waitFor(timeout, () -> master.getProcedures().stream()
@ -217,181 +105,7 @@ public class TestSnapshotProcedure {
return procOpt.get();
}
@Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class)
public void testClientTakingTwoSnapshotOnSameTable() throws Exception {
Thread first = new Thread("first-client") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("first client failed taking snapshot", e);
fail("first client failed taking snapshot");
}
}
};
first.start();
Thread.sleep(1000);
// we don't allow different snapshot with same name
SnapshotDescription snapshotWithSameName =
new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, SnapshotType.SKIPFLUSH);
TEST_UTIL.getAdmin().snapshot(snapshotWithSameName);
}
@Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class)
public void testClientTakeSameSnapshotTwice() throws IOException, InterruptedException {
Thread first = new Thread("first-client") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("first client failed taking snapshot", e);
fail("first client failed taking snapshot");
}
}
};
first.start();
Thread.sleep(1000);
TEST_UTIL.getAdmin().snapshot(snapshot);
}
@Test
public void testTakeZkCoordinatedSnapshotAndProcedureCoordinatedSnapshotBoth() throws Exception {
String newSnapshotName = SNAPSHOT_NAME + "_2";
Thread first = new Thread("procedure-snapshot") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("procedure snapshot failed", e);
fail("procedure snapshot failed");
}
}
};
first.start();
Thread.sleep(1000);
SnapshotManager sm = master.getSnapshotManager();
TEST_UTIL.waitFor(2000, 50, () -> !sm.isTakingSnapshot(TABLE_NAME)
&& sm.isTableTakingAnySnapshot(TABLE_NAME));
TEST_UTIL.getConfiguration().setBoolean("hbase.snapshot.zk.coordinated", true);
SnapshotDescription snapshotOnSameTable =
new SnapshotDescription(newSnapshotName, TABLE_NAME, SnapshotType.SKIPFLUSH);
SnapshotProtos.SnapshotDescription snapshotOnSameTableProto = ProtobufUtil
.createHBaseProtosSnapshotDesc(snapshotOnSameTable);
Thread second = new Thread("zk-snapshot") {
@Override
public void run() {
try {
master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto);
} catch (IOException e) {
LOG.error("zk snapshot failed", e);
fail("zk snapshot failed");
}
}
};
second.start();
TEST_UTIL.waitFor(2000, () -> sm.isTakingSnapshot(TABLE_NAME));
TEST_UTIL.waitFor(60000, () -> sm.isSnapshotDone(snapshotOnSameTableProto)
&& !sm.isTakingAnySnapshot());
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, CF);
}
@Test
public void testRunningTwoSnapshotProcedureOnSameTable() throws Exception {
String newSnapshotName = SNAPSHOT_NAME + "_2";
SnapshotProtos.SnapshotDescription snapshotProto2 = SnapshotProtos.SnapshotDescription
.newBuilder(snapshotProto).setName(newSnapshotName).build();
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp1 = new SnapshotProcedure(env, snapshotProto);
SnapshotProcedure sp2 = new SnapshotProcedure(env, snapshotProto2);
SnapshotProcedure spySp1 = getDelayedOnSpecificStateSnapshotProcedure(sp1,
procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
SnapshotProcedure spySp2 = getDelayedOnSpecificStateSnapshotProcedure(sp2,
procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
long procId1 = procExec.submitProcedure(spySp1);
long procId2 = procExec.submitProcedure(spySp2);
TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures()
.stream().map(Procedure::getProcId).collect(Collectors.toList())
.containsAll(Arrays.asList(procId1, procId2)));
assertFalse(procExec.isFinished(procId1));
assertFalse(procExec.isFinished(procId2));
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId1);
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId2);
List<SnapshotProtos.SnapshotDescription> snapshots =
master.getSnapshotManager().getCompletedSnapshots();
assertEquals(2, snapshots.size());
snapshots.sort(Comparator.comparing(SnapshotProtos.SnapshotDescription::getName));
assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName());
assertEquals(newSnapshotName, snapshots.get(1).getName());
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto2, TABLE_NAME, CF);
}
@Test
public void testTableInMergeWhileTakingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
List<RegionInfo> regions = master.getAssignmentManager().getTableRegions(TABLE_NAME, true)
.stream().sorted(RegionInfo.COMPARATOR).collect(Collectors.toList());
MergeTableRegionsProcedure mergeProc = new MergeTableRegionsProcedure(
procExec.getEnvironment(), new RegionInfo[] {regions.get(0), regions.get(1)}, false);
long mergeProcId = procExec.submitProcedure(mergeProc);
// wait until merge region procedure running
TEST_UTIL.waitFor(10000, () ->
procExec.getProcedure(mergeProcId).getState() == ProcedureState.RUNNABLE);
SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto);
long snapshotProcId = procExec.submitProcedure(sp);
TEST_UTIL.waitFor(2000, 1000, () -> procExec.getProcedure(snapshotProcId) != null &&
procExec.getProcedure(snapshotProcId).getState() == ProcedureState.WAITING_TIMEOUT);
ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test
public void testSnapshotCorruptedAndRollback() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto);
procExec.submitProcedure(sp);
TEST_UTIL.waitFor(60000, 500, () -> sp.getCurrentStateId() >
SnapshotState.SNAPSHOT_CONSOLIDATE_SNAPSHOT_VALUE);
DistributedFileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem();
Optional<HRegion> region = TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME).stream()
.filter(r -> !r.getStoreFileList(new byte[][] { CF }).isEmpty())
.findFirst();
assertTrue(region.isPresent());
region.get().getStoreFileList(new byte[][] { CF }).forEach(s -> {
try {
// delete real data files to trigger the CorruptedSnapshotException
dfs.delete(new Path(s), true);
LOG.info("delete {} to make snapshot corrupt", s);
} catch (Exception e) {
LOG.warn("Failed delete {} to make snapshot corrupt", s, e);
}
}
);
TEST_UTIL.waitFor(60000, () -> sp.isFailed() && sp.isFinished());
Configuration conf = master.getConfiguration();
Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(
snapshotProto, CommonFSUtils.getRootDir(conf), conf);
assertFalse(dfs.exists(workingDir));
assertFalse(master.getSnapshotManager().isTakingSnapshot(TABLE_NAME));
assertFalse(master.getSnapshotManager().isTakingAnySnapshot());
}
private SnapshotProcedure getDelayedOnSpecificStateSnapshotProcedure(
protected SnapshotProcedure getDelayedOnSpecificStateSnapshotProcedure(
SnapshotProcedure sp, MasterProcedureEnv env, SnapshotState state)
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
SnapshotProcedure spySp = Mockito.spy(sp);

View File

@ -0,0 +1,86 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.client.SnapshotDescription;
import org.apache.hadoop.hbase.client.SnapshotType;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureBasicSnapshot extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureBasicSnapshot.class);
@Test
public void testSimpleSnapshotTable() throws Exception {
TEST_UTIL.getAdmin().snapshot(snapshot);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class)
public void testClientTakingTwoSnapshotOnSameTable() throws Exception {
Thread first = new Thread("first-client") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("first client failed taking snapshot", e);
fail("first client failed taking snapshot");
}
}
};
first.start();
Thread.sleep(1000);
// we don't allow different snapshot with same name
SnapshotDescription snapshotWithSameName =
new SnapshotDescription(SNAPSHOT_NAME, TABLE_NAME, SnapshotType.SKIPFLUSH);
TEST_UTIL.getAdmin().snapshot(snapshotWithSameName);
}
@Test(expected = org.apache.hadoop.hbase.snapshot.SnapshotCreationException.class)
public void testClientTakeSameSnapshotTwice() throws IOException, InterruptedException {
Thread first = new Thread("first-client") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("first client failed taking snapshot", e);
fail("first client failed taking snapshot");
}
}
};
first.start();
Thread.sleep(1000);
TEST_UTIL.getAdmin().snapshot(snapshot);
}
}

View File

@ -0,0 +1,138 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.client.SnapshotDescription;
import org.apache.hadoop.hbase.client.SnapshotType;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureConcurrently extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureConcurrently.class);
@Test
public void testRunningTwoSnapshotProcedureOnSameTable() throws Exception {
String newSnapshotName = SNAPSHOT_NAME + "_2";
SnapshotProtos.SnapshotDescription snapshotProto2 = SnapshotProtos.SnapshotDescription
.newBuilder(snapshotProto).setName(newSnapshotName).build();
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp1 = new SnapshotProcedure(env, snapshotProto);
SnapshotProcedure sp2 = new SnapshotProcedure(env, snapshotProto2);
SnapshotProcedure spySp1 = getDelayedOnSpecificStateSnapshotProcedure(sp1,
procExec.getEnvironment(), MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
SnapshotProcedure spySp2 = getDelayedOnSpecificStateSnapshotProcedure(sp2,
procExec.getEnvironment(), MasterProcedureProtos.SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
long procId1 = procExec.submitProcedure(spySp1);
long procId2 = procExec.submitProcedure(spySp2);
TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures()
.stream().map(Procedure::getProcId).collect(Collectors.toList())
.containsAll(Arrays.asList(procId1, procId2)));
assertFalse(procExec.isFinished(procId1));
assertFalse(procExec.isFinished(procId2));
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId1);
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), procId2);
List<SnapshotProtos.SnapshotDescription> snapshots =
master.getSnapshotManager().getCompletedSnapshots();
assertEquals(2, snapshots.size());
snapshots.sort(Comparator.comparing(SnapshotProtos.SnapshotDescription::getName));
assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName());
assertEquals(newSnapshotName, snapshots.get(1).getName());
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto2, TABLE_NAME, CF);
}
@Test
public void testTakeZkCoordinatedSnapshotAndProcedureCoordinatedSnapshotBoth() throws Exception {
String newSnapshotName = SNAPSHOT_NAME + "_2";
Thread first = new Thread("procedure-snapshot") {
@Override
public void run() {
try {
TEST_UTIL.getAdmin().snapshot(snapshot);
} catch (IOException e) {
LOG.error("procedure snapshot failed", e);
fail("procedure snapshot failed");
}
}
};
first.start();
Thread.sleep(1000);
SnapshotManager sm = master.getSnapshotManager();
TEST_UTIL.waitFor(2000, 50, () -> !sm.isTakingSnapshot(TABLE_NAME)
&& sm.isTableTakingAnySnapshot(TABLE_NAME));
TEST_UTIL.getConfiguration().setBoolean("hbase.snapshot.zk.coordinated", true);
SnapshotDescription snapshotOnSameTable =
new SnapshotDescription(newSnapshotName, TABLE_NAME, SnapshotType.SKIPFLUSH);
SnapshotProtos.SnapshotDescription snapshotOnSameTableProto = ProtobufUtil
.createHBaseProtosSnapshotDesc(snapshotOnSameTable);
Thread second = new Thread("zk-snapshot") {
@Override
public void run() {
try {
master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto);
} catch (IOException e) {
LOG.error("zk snapshot failed", e);
fail("zk snapshot failed");
}
}
};
second.start();
TEST_UTIL.waitFor(2000, () -> sm.isTakingSnapshot(TABLE_NAME));
TEST_UTIL.waitFor(60000, () -> sm.isSnapshotDone(snapshotOnSameTableProto)
&& !sm.isTakingAnySnapshot());
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, CF);
}
}

View File

@ -0,0 +1,89 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureMasterRestarts extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureMasterRestarts.class);
@Test
public void testMasterRestarts() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
SnapshotProcedure spySp = getDelayedOnSpecificStateSnapshotProcedure(sp,
procExec.getEnvironment(), SnapshotState.SNAPSHOT_SNAPSHOT_ONLINE_REGIONS);
long procId = procExec.submitProcedure(spySp);
TEST_UTIL.waitFor(2000, () -> env.getMasterServices().getProcedures()
.stream().map(Procedure::getProcId).collect(Collectors.toList()).contains(procId));
TEST_UTIL.getHBaseCluster().killMaster(master.getServerName());
TEST_UTIL.getHBaseCluster().waitForMasterToStop(master.getServerName(), 30000);
TEST_UTIL.getHBaseCluster().startMaster();
TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster();
master = TEST_UTIL.getHBaseCluster().getMaster();
assertTrue(master.getSnapshotManager().isTakingAnySnapshot());
assertTrue(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME));
List<SnapshotProcedure> unfinishedProcedures = master
.getMasterProcedureExecutor().getProcedures().stream()
.filter(p -> p instanceof SnapshotProcedure)
.filter(p -> !p.isFinished()).map(p -> (SnapshotProcedure) p)
.collect(Collectors.toList());
assertEquals(unfinishedProcedures.size(), 1);
long newProcId = unfinishedProcedures.get(0).getProcId();
assertEquals(procId, newProcId);
ProcedureTestingUtility.waitProcedure(master.getMasterProcedureExecutor(), newProcId);
assertFalse(master.getSnapshotManager().isTableTakingAnySnapshot(TABLE_NAME));
List<SnapshotProtos.SnapshotDescription> snapshots
= master.getSnapshotManager().getCompletedSnapshots();
assertEquals(1, snapshots.size());
assertEquals(SNAPSHOT_NAME, snapshots.get(0).getName());
assertEquals(TABLE_NAME, TableName.valueOf(snapshots.get(0).getTable()));
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
}

View File

@ -0,0 +1,62 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureRIT extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureRIT.class);
@Test
public void testTableInMergeWhileTakingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
List<RegionInfo> regions = master.getAssignmentManager().getTableRegions(TABLE_NAME, true)
.stream().sorted(RegionInfo.COMPARATOR).collect(Collectors.toList());
MergeTableRegionsProcedure mergeProc = new MergeTableRegionsProcedure(
procExec.getEnvironment(), new RegionInfo[] {regions.get(0), regions.get(1)}, false);
long mergeProcId = procExec.submitProcedure(mergeProc);
// wait until merge region procedure running
TEST_UTIL.waitFor(10000, () ->
procExec.getProcedure(mergeProcId).getState() == ProcedureState.RUNNABLE);
SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto);
long snapshotProcId = procExec.submitProcedure(sp);
TEST_UTIL.waitFor(2000, 1000, () -> procExec.getProcedure(snapshotProcId) != null &&
procExec.getProcedure(snapshotProcId).getState() == ProcedureState.WAITING_TIMEOUT);
ProcedureTestingUtility.waitProcedure(procExec, snapshotProcId);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
}

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureRSCrashes extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureRSCrashes.class);
@Test
public void testRegionServerCrashWhileTakingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
long procId = procExec.submitProcedure(sp);
SnapshotRegionProcedure snp = waitProcedureRunnableAndGetFirst(
SnapshotRegionProcedure.class, 60000);
ServerName targetServer = env.getAssignmentManager().getRegionStates()
.getRegionStateNode(snp.getRegion()).getRegionLocation();
TEST_UTIL.getHBaseCluster().killRegionServer(targetServer);
TEST_UTIL.waitFor(60000, () -> snp.inRetrying());
ProcedureTestingUtility.waitProcedure(procExec, procId);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
@Test
public void testRegionServerCrashWhileVerifyingSnapshot() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
MasterProcedureEnv env = procExec.getEnvironment();
SnapshotProcedure sp = new SnapshotProcedure(env, snapshotProto);
long procId = procExec.submitProcedure(sp);
SnapshotVerifyProcedure svp = waitProcedureRunnableAndGetFirst(
SnapshotVerifyProcedure.class, 60000);
TEST_UTIL.waitFor(10000, () -> svp.getServerName() != null);
ServerName previousTargetServer = svp.getServerName();
HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(previousTargetServer);
TEST_UTIL.getHBaseCluster().killRegionServer(rs.getServerName());
TEST_UTIL.waitFor(60000, () -> svp.getServerName() != null
&& !svp.getServerName().equals(previousTargetServer));
ProcedureTestingUtility.waitProcedure(procExec, procId);
SnapshotTestingUtils.assertOneSnapshotThatMatches(TEST_UTIL.getAdmin(), snapshotProto);
SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotProto, TABLE_NAME, CF);
}
}

View File

@ -0,0 +1,78 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Optional;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
@Category({ MasterTests.class, MediumTests.class })
public class TestSnapshotProcedureSnapshotCorrupted extends TestSnapshotProcedure {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestSnapshotProcedureSnapshotCorrupted.class);
@Test
public void testSnapshotCorruptedAndRollback() throws Exception {
ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
SnapshotProcedure sp = new SnapshotProcedure(procExec.getEnvironment(), snapshotProto);
procExec.submitProcedure(sp);
TEST_UTIL.waitFor(60000, 500, () -> sp.getCurrentStateId() >
MasterProcedureProtos.SnapshotState.SNAPSHOT_CONSOLIDATE_SNAPSHOT_VALUE);
DistributedFileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem();
Optional<HRegion> region = TEST_UTIL.getHBaseCluster().getRegions(TABLE_NAME).stream()
.filter(r -> !r.getStoreFileList(new byte[][] { CF }).isEmpty())
.findFirst();
assertTrue(region.isPresent());
region.get().getStoreFileList(new byte[][] { CF }).forEach(s -> {
try {
// delete real data files to trigger the CorruptedSnapshotException
dfs.delete(new Path(s), true);
LOG.info("delete {} to make snapshot corrupt", s);
} catch (Exception e) {
LOG.warn("Failed delete {} to make snapshot corrupt", s, e);
}
}
);
TEST_UTIL.waitFor(60000, () -> sp.isFailed() && sp.isFinished());
Configuration conf = master.getConfiguration();
Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(
snapshotProto, CommonFSUtils.getRootDir(conf), conf);
assertFalse(dfs.exists(workingDir));
assertFalse(master.getSnapshotManager().isTakingSnapshot(TABLE_NAME));
assertFalse(master.getSnapshotManager().isTakingAnySnapshot());
}
}

View File

@ -21,7 +21,6 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import com.google.protobuf.ServiceException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@ -75,7 +74,6 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
@ -271,33 +269,6 @@ public final class SnapshotTestingUtils {
}
}
/**
* Helper method for testing async snapshot operations. Just waits for the
* given snapshot to complete on the server by repeatedly checking the master.
*
* @param master the master running the snapshot
* @param snapshot the snapshot to check
* @param sleep amount to sleep between checks to see if the snapshot is done
* @throws ServiceException if the snapshot fails
* @throws org.apache.hbase.thirdparty.com.google.protobuf.ServiceException
*/
public static void waitForSnapshotToComplete(HMaster master,
SnapshotProtos.SnapshotDescription snapshot, long sleep)
throws org.apache.hbase.thirdparty.com.google.protobuf.ServiceException {
final IsSnapshotDoneRequest request = IsSnapshotDoneRequest.newBuilder()
.setSnapshot(snapshot).build();
IsSnapshotDoneResponse done = IsSnapshotDoneResponse.newBuilder()
.buildPartial();
while (!done.getDone()) {
done = master.getMasterRpcServices().isSnapshotDone(null, request);
try {
Thread.sleep(sleep);
} catch (InterruptedException e) {
throw new org.apache.hbase.thirdparty.com.google.protobuf.ServiceException(e);
}
}
}
/*
* Take snapshot with maximum of numTries attempts, ignoring CorruptedSnapshotException
* except for the last CorruptedSnapshotException

View File

@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.SnapshotDescription;
import org.apache.hadoop.hbase.client.SnapshotType;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
import org.apache.hadoop.hbase.testclassification.LargeTests;
@ -276,12 +275,8 @@ public class TestFlushSnapshotFromClient {
.setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build();
// take the snapshot async
admin.takeSnapshotAsync(
new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH));
// constantly loop, looking for the snapshot to complete
HMaster master = UTIL.getMiniHBaseCluster().getMaster();
SnapshotTestingUtils.waitForSnapshotToComplete(master, snapshot, 200);
admin.snapshotAsync(
new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)).get();
LOG.info(" === Async Snapshot Completed ===");
UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG);