HBASE-20878 Data loss if merging regions while ServerCrashProcedure executing
This commit is contained in:
parent
af2742fcf2
commit
44bf7076b7
|
@ -269,6 +269,7 @@ enum MergeTableRegionsState {
|
|||
MERGE_TABLE_REGIONS_POST_MERGE_COMMIT_OPERATION = 9;
|
||||
MERGE_TABLE_REGIONS_OPEN_MERGED_REGION = 10;
|
||||
MERGE_TABLE_REGIONS_POST_OPERATION = 11;
|
||||
MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS = 12;
|
||||
}
|
||||
|
||||
message MergeTableRegionsStateData {
|
||||
|
|
|
@ -219,7 +219,7 @@ public class MergeTableRegionsProcedure
|
|||
|
||||
@Override
|
||||
protected Flow executeFromState(final MasterProcedureEnv env,
|
||||
final MergeTableRegionsState state) {
|
||||
MergeTableRegionsState state) {
|
||||
LOG.trace("{} execute state={}", this, state);
|
||||
try {
|
||||
switch (state) {
|
||||
|
@ -236,6 +236,10 @@ public class MergeTableRegionsProcedure
|
|||
break;
|
||||
case MERGE_TABLE_REGIONS_CLOSE_REGIONS:
|
||||
addChildProcedure(createUnassignProcedures(env, getRegionReplication(env)));
|
||||
setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
|
||||
break;
|
||||
case MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
|
||||
checkClosedRegions(env);
|
||||
setNextState(MergeTableRegionsState.MERGE_TABLE_REGIONS_CREATE_MERGED_REGION);
|
||||
break;
|
||||
case MERGE_TABLE_REGIONS_CREATE_MERGED_REGION:
|
||||
|
@ -314,6 +318,8 @@ public class MergeTableRegionsProcedure
|
|||
case MERGE_TABLE_REGIONS_WRITE_MAX_SEQUENCE_ID_FILE:
|
||||
cleanupMergedRegion(env);
|
||||
break;
|
||||
case MERGE_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
|
||||
break;
|
||||
case MERGE_TABLE_REGIONS_CLOSE_REGIONS:
|
||||
rollbackCloseRegionsForMerge(env);
|
||||
break;
|
||||
|
@ -461,6 +467,33 @@ public class MergeTableRegionsProcedure
|
|||
return env.getAssignmentManager().getAssignmentManagerMetrics().getMergeProcMetrics();
|
||||
}
|
||||
|
||||
/**
|
||||
* check the closed regions
|
||||
* @param env the master env
|
||||
* @throws IOException IOException
|
||||
*/
|
||||
private void checkClosedRegions(final MasterProcedureEnv env) throws IOException {
|
||||
checkClosedRegion(env, regionsToMerge[0]);
|
||||
checkClosedRegion(env, regionsToMerge[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether there is recovered.edits in the closed region
|
||||
* If any, that means this region is not closed property, we need
|
||||
* to abort region merge to prevent data loss
|
||||
* @param env master env
|
||||
* @param regionInfo regioninfo
|
||||
* @throws IOException IOException
|
||||
*/
|
||||
private void checkClosedRegion(final MasterProcedureEnv env,
|
||||
RegionInfo regionInfo) throws IOException {
|
||||
if (WALSplitter.hasRecoveredEdits(env.getMasterServices().getFileSystem(),
|
||||
env.getMasterConfiguration(), regionInfo)) {
|
||||
throw new IOException("Recovered.edits are found in Region: " + regionInfo
|
||||
+ ", abort merge to prevent data loss");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare merge and do some check
|
||||
* @param env MasterProcedureEnv
|
||||
|
|
|
@ -141,7 +141,7 @@ public class SplitTableRegionProcedure
|
|||
/**
|
||||
* Check whether there is recovered.edits in the closed region
|
||||
* If any, that means this region is not closed property, we need
|
||||
* to abort region merge to prevent data loss
|
||||
* to abort region split to prevent data loss
|
||||
* @param env master env
|
||||
* @throws IOException IOException
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
|
||||
import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
|
||||
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
|
||||
@Category({MasterTests.class, MediumTests.class})
|
||||
public class TestMergeTableRegionsWhileRSCrash {
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestMergeTableRegionsWhileRSCrash.class);
|
||||
|
||||
private static final Logger LOG = LoggerFactory
|
||||
.getLogger(TestMergeTableRegionsWhileRSCrash.class);
|
||||
|
||||
protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||
private static TableName TABLE_NAME = TableName.valueOf("test");
|
||||
private static Admin admin;
|
||||
private static byte[] CF = Bytes.toBytes("cf");
|
||||
private static byte[] SPLITKEY = Bytes.toBytes("row5");
|
||||
private static CountDownLatch mergeCommitArrive = new CountDownLatch(1);
|
||||
private static Table TABLE;
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
UTIL.startMiniCluster(1);
|
||||
admin = UTIL.getHBaseAdmin();
|
||||
byte[][] splitKeys = new byte[1][];
|
||||
splitKeys[0] = SPLITKEY;
|
||||
TABLE = UTIL.createTable(TABLE_NAME, CF, splitKeys);
|
||||
UTIL.waitTableAvailable(TABLE_NAME);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void cleanupTest() throws Exception {
|
||||
try {
|
||||
UTIL.shutdownMiniCluster();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("failure shutting down cluster", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
//write some rows to the table
|
||||
for (int i = 0; i < 10; i++) {
|
||||
byte[] row = Bytes.toBytes("row" + i);
|
||||
Put put = new Put(row);
|
||||
put.addColumn(CF, CF, CF);
|
||||
TABLE.put(put);
|
||||
}
|
||||
MasterProcedureEnv env = UTIL.getMiniHBaseCluster().getMaster()
|
||||
.getMasterProcedureExecutor().getEnvironment();
|
||||
final ProcedureExecutor<MasterProcedureEnv> executor = UTIL.getMiniHBaseCluster()
|
||||
.getMaster().getMasterProcedureExecutor();
|
||||
List<RegionInfo> regionInfos = admin.getRegions(TABLE_NAME);
|
||||
MergeTableRegionsProcedure mergeTableRegionsProcedure = new MergeTableRegionsProcedure(
|
||||
env, regionInfos.get(0), regionInfos.get(1));
|
||||
executor.submitProcedure(mergeTableRegionsProcedure);
|
||||
UTIL.waitFor(30000, () -> executor.getProcedures().stream()
|
||||
.filter(p -> p instanceof UnassignProcedure)
|
||||
.map(p -> (UnassignProcedure) p)
|
||||
.anyMatch(p -> TABLE_NAME.equals(p.getTableName())));
|
||||
UTIL.getMiniHBaseCluster().killRegionServer(
|
||||
UTIL.getMiniHBaseCluster().getRegionServer(0).getServerName());
|
||||
UTIL.getMiniHBaseCluster().startRegionServer();
|
||||
UTIL.waitUntilNoRegionsInTransition();
|
||||
Scan scan = new Scan();
|
||||
ResultScanner results = TABLE.getScanner(scan);
|
||||
int count = 0;
|
||||
Result result = null;
|
||||
while ((result = results.next()) != null) {
|
||||
count++;
|
||||
}
|
||||
Assert.assertEquals("There should be 10 rows!", 10, count);
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
Loading…
Reference in New Issue