HBASE-26120 New replication gets stuck or data loss when multiwal groups more than 10 (#3528)

Signed-off-by: Andrew Purtell <apurtell@apache.org>
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Duo Zhang 2021-07-27 23:14:15 +08:00 committed by GitHub
parent 20a4aaedcc
commit dd22fecc2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 21 deletions

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -66,6 +65,7 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
@ -154,7 +154,7 @@ public class ReplicationSourceManager {
private final Configuration conf;
private final FileSystem fs;
// The paths to the latest log of each wal group, for new coming peers
private final Set<Path> latestPaths;
private final Map<String, Path> latestPaths;
// Path to the wals directories
private final Path logDir;
// Path to the wal archive
@ -225,7 +225,7 @@ public class ReplicationSourceManager {
tfb.setNameFormat("ReplicationExecutor-%d");
tfb.setDaemon(true);
this.executor.setThreadFactory(tfb.build());
this.latestPaths = new HashSet<Path>();
this.latestPaths = new HashMap<>();
replicationForBulkLoadDataEnabled = conf.getBoolean(HConstants.REPLICATION_BULKLOAD_ENABLE_KEY,
HConstants.REPLICATION_BULKLOAD_ENABLE_DEFAULT);
this.totalBufferLimit = conf.getLong(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY,
@ -345,17 +345,17 @@ public class ReplicationSourceManager {
Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
this.walsById.put(peerId, walsByGroup);
// Add the latest wal to that source's queue
if (this.latestPaths.size() > 0) {
for (Path logPath : latestPaths) {
String name = logPath.getName();
String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(name);
NavigableSet<String> logs = new TreeSet<>();
logs.add(name);
walsByGroup.put(walPrefix, logs);
if (!latestPaths.isEmpty()) {
for (Map.Entry<String, Path> walPrefixAndPath : latestPaths.entrySet()) {
Path walPath = walPrefixAndPath.getValue();
NavigableSet<String> wals = new TreeSet<>();
wals.add(walPath.getName());
walsByGroup.put(walPrefixAndPath.getKey(), wals);
// Abort RS and throw exception to make add peer failed
abortAndThrowIOExceptionWhenFail(
() -> this.queueStorage.addWAL(server.getServerName(), peerId, name));
src.enqueueLog(logPath);
() -> this.queueStorage.addWAL(server.getServerName(), peerId, walPath.getName()));
src.enqueueLog(walPath);
LOG.trace("Enqueued {} to source {} during source creation.", walPath, src.getQueueId());
}
}
}
@ -606,15 +606,7 @@ public class ReplicationSourceManager {
}
// Add to latestPaths
Iterator<Path> iterator = latestPaths.iterator();
while (iterator.hasNext()) {
Path path = iterator.next();
if (path.getName().contains(logPrefix)) {
iterator.remove();
break;
}
}
this.latestPaths.add(newLog);
latestPaths.put(logPrefix, newLog);
}
}
@ -795,6 +787,12 @@ public class ReplicationSourceManager {
}
}
Set<Path> getLastestPath() {
synchronized (latestPaths) {
return Sets.newHashSet(latestPaths.values());
}
}
public AtomicLong getTotalBufferUsed() {
return totalBufferUsed;
}

View File

@ -640,6 +640,22 @@ public abstract class TestReplicationSourceManager {
});
}
@Test
public void testSameWALPrefix() throws IOException {
Set<String> latestWalsBefore =
manager.getLastestPath().stream().map(Path::getName).collect(Collectors.toSet());
String walName1 = "localhost,8080,12345-45678-Peer.34567";
String walName2 = "localhost,8080,12345.56789";
manager.preLogRoll(new Path(walName1));
manager.preLogRoll(new Path(walName2));
Set<String> latestWals = manager.getLastestPath().stream().map(Path::getName)
.filter(n -> !latestWalsBefore.contains(n)).collect(Collectors.toSet());
assertEquals(2, latestWals.size());
assertTrue(latestWals.contains(walName1));
assertTrue(latestWals.contains(walName2));
}
private WALEdit getBulkLoadWALEdit(NavigableMap<byte[], Integer> scope) {
// 1. Create store files for the families
Map<byte[], List<Path>> storeFiles = new HashMap<>(1);