HBASE-24871 Replication may loss data when refresh recovered replication sources (#2249)
Signed-off-by: huaxiangsun <huaxiangsun@apache.org> Signed-off-by: Guanghao Zhang <zghao@apache.org> Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
This commit is contained in:
parent
836c04d356
commit
0e63b12648
|
@ -417,17 +417,12 @@ public class ReplicationSourceManager implements ReplicationListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (String queueId : previousQueueIds) {
|
for (String queueId : previousQueueIds) {
|
||||||
ReplicationSourceInterface replicationSource = createSource(queueId, peer);
|
ReplicationSourceInterface recoveredReplicationSource = createSource(queueId, peer);
|
||||||
this.oldsources.add(replicationSource);
|
this.oldsources.add(recoveredReplicationSource);
|
||||||
LOG.trace("Added source for recovered queue: " + src.getQueueId());
|
|
||||||
for (SortedSet<String> walsByGroup : walsByIdRecoveredQueues.get(queueId).values()) {
|
for (SortedSet<String> walsByGroup : walsByIdRecoveredQueues.get(queueId).values()) {
|
||||||
walsByGroup.forEach(wal -> {
|
walsByGroup.forEach(wal -> recoveredReplicationSource.enqueueLog(new Path(wal)));
|
||||||
LOG.trace("Enqueueing log from recovered queue for source: {}",
|
|
||||||
src.getQueueId());
|
|
||||||
src.enqueueLog(new Path(wal));
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
toStartup.add(replicationSource);
|
toStartup.add(recoveredReplicationSource);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (ReplicationSourceInterface replicationSource : toStartup) {
|
for (ReplicationSourceInterface replicationSource : toStartup) {
|
||||||
|
|
|
@ -77,7 +77,7 @@ public class TestReplicationBase {
|
||||||
protected static Configuration CONF1 = UTIL1.getConfiguration();
|
protected static Configuration CONF1 = UTIL1.getConfiguration();
|
||||||
protected static Configuration CONF2 = UTIL2.getConfiguration();
|
protected static Configuration CONF2 = UTIL2.getConfiguration();
|
||||||
|
|
||||||
protected static final int NUM_SLAVES1 = 1;
|
protected static int NUM_SLAVES1 = 1;
|
||||||
protected static final int NUM_SLAVES2 = 1;
|
protected static final int NUM_SLAVES2 = 1;
|
||||||
protected static final int NB_ROWS_IN_BATCH = 100;
|
protected static final int NB_ROWS_IN_BATCH = 100;
|
||||||
protected static final int NB_ROWS_IN_BIG_BATCH =
|
protected static final int NB_ROWS_IN_BIG_BATCH =
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.replication.regionserver;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||||
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||||
|
import org.apache.hadoop.hbase.client.Scan;
|
||||||
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||||
|
import org.apache.hadoop.hbase.replication.TestReplicationBase;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.ReplicationTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for HBASE-24871.
|
||||||
|
*/
|
||||||
|
@Category({ ReplicationTests.class, MediumTests.class })
|
||||||
|
public class TestRefreshRecoveredReplication extends TestReplicationBase {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestRefreshRecoveredReplication.class);
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(TestRefreshRecoveredReplication.class);
|
||||||
|
|
||||||
|
private static final int BATCH = 50;
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TestName name = new TestName();
|
||||||
|
|
||||||
|
private TableName tablename;
|
||||||
|
private Table table1;
|
||||||
|
private Table table2;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
NUM_SLAVES1 = 2;
|
||||||
|
// replicate slowly
|
||||||
|
Configuration conf1 = UTIL1.getConfiguration();
|
||||||
|
conf1.setInt(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY, 100);
|
||||||
|
TestReplicationBase.setUpBeforeClass();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownAfterClass() throws Exception {
|
||||||
|
TestReplicationBase.tearDownAfterClass();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws Exception {
|
||||||
|
setUpBase();
|
||||||
|
|
||||||
|
tablename = TableName.valueOf(name.getMethodName());
|
||||||
|
TableDescriptor table = TableDescriptorBuilder.newBuilder(tablename)
|
||||||
|
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(famName)
|
||||||
|
.setScope(HConstants.REPLICATION_SCOPE_GLOBAL).build())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
UTIL1.getAdmin().createTable(table);
|
||||||
|
UTIL2.getAdmin().createTable(table);
|
||||||
|
UTIL1.waitTableAvailable(tablename);
|
||||||
|
UTIL2.waitTableAvailable(tablename);
|
||||||
|
table1 = UTIL1.getConnection().getTable(tablename);
|
||||||
|
table2 = UTIL2.getConnection().getTable(tablename);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void teardown() throws Exception {
|
||||||
|
tearDownBase();
|
||||||
|
|
||||||
|
UTIL1.deleteTableIfAny(tablename);
|
||||||
|
UTIL2.deleteTableIfAny(tablename);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplicationRefreshSource() throws Exception {
|
||||||
|
// put some data
|
||||||
|
for (int i = 0; i < BATCH; i++) {
|
||||||
|
byte[] r = Bytes.toBytes(i);
|
||||||
|
table1.put(new Put(r).addColumn(famName, famName, r));
|
||||||
|
}
|
||||||
|
|
||||||
|
// kill rs holding table region
|
||||||
|
Optional<RegionServerThread> server = UTIL1.getMiniHBaseCluster().getLiveRegionServerThreads()
|
||||||
|
.stream()
|
||||||
|
.filter(rst -> CollectionUtils.isNotEmpty(rst.getRegionServer().getRegions(tablename)))
|
||||||
|
.findAny();
|
||||||
|
Assert.assertTrue(server.isPresent());
|
||||||
|
server.get().getRegionServer().abort("stopping for test");
|
||||||
|
UTIL1.waitFor(60000, () ->
|
||||||
|
UTIL1.getMiniHBaseCluster().getLiveRegionServerThreads().size() == NUM_SLAVES1 - 1);
|
||||||
|
UTIL1.waitTableAvailable(tablename);
|
||||||
|
|
||||||
|
// waiting for recovered peer to start
|
||||||
|
Replication replication = (Replication) UTIL1.getMiniHBaseCluster()
|
||||||
|
.getLiveRegionServerThreads().get(0).getRegionServer().getReplicationSourceService();
|
||||||
|
UTIL1.waitFor(60000, () ->
|
||||||
|
!replication.getReplicationManager().getOldSources().isEmpty());
|
||||||
|
|
||||||
|
// disable peer to trigger refreshSources
|
||||||
|
hbaseAdmin.disableReplicationPeer(PEER_ID2);
|
||||||
|
LOG.info("has replicated {} rows before disable peer", checkReplicationData());
|
||||||
|
hbaseAdmin.enableReplicationPeer(PEER_ID2);
|
||||||
|
// waiting to replicate all data to slave
|
||||||
|
UTIL2.waitFor(60000, () -> {
|
||||||
|
int count = checkReplicationData();
|
||||||
|
LOG.info("Waiting all logs pushed to slave. Expected {} , actual {}", BATCH, count);
|
||||||
|
return count == BATCH;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private int checkReplicationData() throws IOException {
|
||||||
|
int count = 0;
|
||||||
|
ResultScanner results = table2.getScanner(new Scan().setCaching(BATCH));
|
||||||
|
for (Result r : results) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue