mirror of https://github.com/apache/lucene.git
SOLR-11293: Potential data loss in TLOG replicas after replication failures
This commit is contained in:
parent
913a2c4345
commit
1d31370577
|
@ -116,6 +116,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-11278: Stopping CDCR should cancel a running bootstrap operation. (Amrit Sarkar, shalin)
|
||||
|
||||
* SOLR-11293: Potential data loss in TLOG replicas after replication failures (noble)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -89,7 +89,6 @@ public class ReplicateFromLeader {
|
|||
if (pollSuccess) {
|
||||
String commitVersion = getCommitVersion(core);
|
||||
if (commitVersion == null) return;
|
||||
if (Long.parseLong(commitVersion) == lastVersion) return;
|
||||
UpdateLog updateLog = solrCore.getUpdateHandler().getUpdateLog();
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core,
|
||||
new ModifiableSolrParams());
|
||||
|
|
|
@ -989,7 +989,7 @@ public class ZkController {
|
|||
if (isTlogReplicaAndNotLeader) {
|
||||
String commitVersion = ReplicateFromLeader.getCommitVersion(core);
|
||||
if (commitVersion != null) {
|
||||
ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
|
||||
ulog.copyOverOldUpdates(Long.parseLong(commitVersion), true);
|
||||
}
|
||||
}
|
||||
// we will call register again after zk expiration and on reload
|
||||
|
|
|
@ -1158,10 +1158,12 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
|
||||
protected void copyAndSwitchToNewTlog(CommitUpdateCommand cuc) {
|
||||
synchronized (this) {
|
||||
if (tlog == null) return;
|
||||
if (tlog == null && prevTlog == null && prevMapLog2 == null && logs.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
preCommit(cuc);
|
||||
try {
|
||||
copyOverOldUpdates(cuc.getVersion());
|
||||
copyOverOldUpdates(cuc.getVersion(), false);
|
||||
} finally {
|
||||
postCommit(cuc);
|
||||
}
|
||||
|
@ -1171,8 +1173,9 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
/**
|
||||
* Copy over updates from prevTlog or last tlog (in tlog folder) to a new tlog
|
||||
* @param commitVersion any updates that have version larger than the commitVersion will be copied over
|
||||
* @param omitCommitted if a tlog is already committed then don't read it
|
||||
*/
|
||||
public void copyOverOldUpdates(long commitVersion) {
|
||||
public void copyOverOldUpdates(long commitVersion, boolean omitCommitted) {
|
||||
TransactionLog oldTlog = prevTlog;
|
||||
if (oldTlog == null && !logs.isEmpty()) {
|
||||
oldTlog = logs.getFirst();
|
||||
|
@ -1182,9 +1185,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
}
|
||||
|
||||
try {
|
||||
if (oldTlog.endsWithCommit()) {
|
||||
return;
|
||||
}
|
||||
if (omitCommitted && oldTlog.endsWithCommit()) return;
|
||||
} catch (IOException e) {
|
||||
log.warn("Exception reading log", e);
|
||||
return;
|
||||
|
|
|
@ -30,7 +30,6 @@ import java.util.Properties;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.JSONTestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
|
@ -66,7 +65,6 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
@Slow
|
||||
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11293")
|
||||
public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
@ -222,6 +220,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
|
||||
|
||||
// Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
|
||||
log.info("partitioning replica : " + notLeaders.get(0));
|
||||
SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
|
||||
|
||||
proxy0.close();
|
||||
|
@ -241,6 +240,7 @@ public class HttpPartitionTest extends AbstractFullDistribZkTestBase {
|
|||
Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
|
||||
assertEquals("The partitioned replica did not get marked down",
|
||||
Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
|
||||
log.info("un-partitioning replica : " + notLeaders.get(0));
|
||||
|
||||
proxy0.reopen();
|
||||
|
||||
|
|
Loading…
Reference in New Issue