mirror of
https://github.com/apache/lucene.git
synced 2025-02-12 13:05:29 +00:00
SOLR-10279: The autoAddReplica feature can result in SolrCores being assigned new shards when using legacyCloud=false and will also fail on a state check when taking over a core registration with a new core.
This commit is contained in:
parent
83772c6f99
commit
cab7e1a3d7
@ -243,6 +243,10 @@ Bug Fixes
|
|||||||
|
|
||||||
* SOLR-10269: MetricsHandler JSON output incorrect. (ab)
|
* SOLR-10269: MetricsHandler JSON output incorrect. (ab)
|
||||||
|
|
||||||
|
* SOLR-10279: The autoAddReplica feature can result in SolrCores being assigned new shards when using
|
||||||
|
legacyCloud=false and will also fail on a state check when taking over a core registration with a new
|
||||||
|
core. (Mark Miller, Hrishikesh Gadre, Patrick Dvorack)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -243,13 +243,14 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
|
|||||||
final String dataDir = badReplica.replica.getStr("dataDir");
|
final String dataDir = badReplica.replica.getStr("dataDir");
|
||||||
final String ulogDir = badReplica.replica.getStr("ulogDir");
|
final String ulogDir = badReplica.replica.getStr("ulogDir");
|
||||||
final String coreNodeName = badReplica.replica.getName();
|
final String coreNodeName = badReplica.replica.getName();
|
||||||
|
final String shardId = badReplica.slice.getName();
|
||||||
if (dataDir != null) {
|
if (dataDir != null) {
|
||||||
// need an async request - full shard goes down leader election
|
// need an async request - full shard goes down leader election
|
||||||
final String coreName = badReplica.replica.getStr(ZkStateReader.CORE_NAME_PROP);
|
final String coreName = badReplica.replica.getStr(ZkStateReader.CORE_NAME_PROP);
|
||||||
log.debug("submit call to {}", createUrl);
|
log.debug("submit call to {}", createUrl);
|
||||||
MDC.put("OverseerAutoReplicaFailoverThread.createUrl", createUrl);
|
MDC.put("OverseerAutoReplicaFailoverThread.createUrl", createUrl);
|
||||||
try {
|
try {
|
||||||
updateExecutor.submit(() -> createSolrCore(collection, createUrl, dataDir, ulogDir, coreNodeName, coreName));
|
updateExecutor.submit(() -> createSolrCore(collection, createUrl, dataDir, ulogDir, coreNodeName, coreName, shardId));
|
||||||
} finally {
|
} finally {
|
||||||
MDC.remove("OverseerAutoReplicaFailoverThread.createUrl");
|
MDC.remove("OverseerAutoReplicaFailoverThread.createUrl");
|
||||||
}
|
}
|
||||||
@ -440,7 +441,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
|
|||||||
|
|
||||||
private boolean createSolrCore(final String collection,
|
private boolean createSolrCore(final String collection,
|
||||||
final String createUrl, final String dataDir, final String ulogDir,
|
final String createUrl, final String dataDir, final String ulogDir,
|
||||||
final String coreNodeName, final String coreName) {
|
final String coreNodeName, final String coreName, final String shardId) {
|
||||||
|
|
||||||
try (HttpSolrClient client = new HttpSolrClient.Builder(createUrl).build()) {
|
try (HttpSolrClient client = new HttpSolrClient.Builder(createUrl).build()) {
|
||||||
log.debug("create url={}", createUrl);
|
log.debug("create url={}", createUrl);
|
||||||
@ -451,6 +452,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
|
|||||||
createCmd.setCoreNodeName(coreNodeName);
|
createCmd.setCoreNodeName(coreNodeName);
|
||||||
// TODO: how do we ensure unique coreName
|
// TODO: how do we ensure unique coreName
|
||||||
// for now, the collections API will use unique names
|
// for now, the collections API will use unique names
|
||||||
|
createCmd.setShardId(shardId);
|
||||||
createCmd.setCoreName(coreName);
|
createCmd.setCoreName(coreName);
|
||||||
createCmd.setDataDir(dataDir);
|
createCmd.setDataDir(dataDir);
|
||||||
createCmd.setUlogDir(ulogDir.substring(0, ulogDir.length() - "/tlog".length()));
|
createCmd.setUlogDir(ulogDir.substring(0, ulogDir.length() - "/tlog".length()));
|
||||||
|
@ -1449,13 +1449,7 @@ public class ZkController {
|
|||||||
errorMessage.set("coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId());
|
errorMessage.set("coreNodeName " + coreNodeName + " does not exist in shard " + cloudDesc.getShardId());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
String baseUrl = replica.getStr(BASE_URL_PROP);
|
return true;
|
||||||
String coreName = replica.getStr(CORE_NAME_PROP);
|
|
||||||
if (baseUrl.equals(this.baseURL) && coreName.equals(cd.getName())) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
errorMessage.set("coreNodeName " + coreNodeName + " exists, but does not match expected node or core name");
|
|
||||||
return false;
|
|
||||||
});
|
});
|
||||||
} catch (TimeoutException e) {
|
} catch (TimeoutException e) {
|
||||||
String error = errorMessage.get();
|
String error = errorMessage.get();
|
||||||
|
@ -38,6 +38,7 @@ import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
|||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest.Create;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest.Create;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||||
@ -103,6 +104,11 @@ public class SharedFSAutoReplicaFailoverTest extends AbstractFullDistribZkTestBa
|
|||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
collectionUlogDirMap.clear();
|
collectionUlogDirMap.clear();
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
CollectionAdminRequest.setClusterProperty("legacyCloud", "false").process(cloudClient);
|
||||||
|
} else {
|
||||||
|
CollectionAdminRequest.setClusterProperty("legacyCloud", "true").process(cloudClient);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -313,6 +319,29 @@ public class SharedFSAutoReplicaFailoverTest extends AbstractFullDistribZkTestBa
|
|||||||
assertSliceAndReplicaCount(collection1);
|
assertSliceAndReplicaCount(collection1);
|
||||||
|
|
||||||
assertUlogDir(collections);
|
assertUlogDir(collections);
|
||||||
|
|
||||||
|
// restart all to test core saved state
|
||||||
|
|
||||||
|
ChaosMonkey.stop(jettys);
|
||||||
|
ChaosMonkey.stop(controlJetty);
|
||||||
|
|
||||||
|
assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
|
||||||
|
|
||||||
|
ChaosMonkey.start(jettys);
|
||||||
|
ChaosMonkey.start(controlJetty);
|
||||||
|
|
||||||
|
assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), collection1, 120000));
|
||||||
|
|
||||||
|
assertSliceAndReplicaCount(collection1);
|
||||||
|
|
||||||
|
assertUlogDir(collections);
|
||||||
|
|
||||||
|
assertSliceAndReplicaCount(collection1);
|
||||||
|
assertSingleReplicationAndShardSize(collection3, 5);
|
||||||
|
|
||||||
|
// all docs should be queried
|
||||||
|
assertSingleReplicationAndShardSize(collection4, 5);
|
||||||
|
queryAndAssertResultSize(collection4, numDocs, 10000);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void queryAndAssertResultSize(String collection, int expectedResultSize, int timeoutMS)
|
private void queryAndAssertResultSize(String collection, int expectedResultSize, int timeoutMS)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user