SOLR-9847: Stop blocking further schema updates while waiting for a pending update to propagate to other replicas. This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates.

This commit is contained in:
Steve Rowe 2016-12-20 12:05:33 -05:00
parent b37a72d941
commit 04108d9935
2 changed files with 59 additions and 44 deletions

View File

@ -271,6 +271,10 @@ Bug Fixes
* SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down. * SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down.
(Karl Wright, Mark Miller) (Karl Wright, Mark Miller)
* SOLR-9847: Stop blocking further schema updates while waiting for a pending update to propagate to other replicas.
This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates.
(Mark Miller, Steve Rowe)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -88,9 +88,7 @@ public class SchemaManager {
IndexSchema schema = req.getCore().getLatestSchema(); IndexSchema schema = req.getCore().getLatestSchema();
if (schema instanceof ManagedIndexSchema && schema.isMutable()) { if (schema instanceof ManagedIndexSchema && schema.isMutable()) {
synchronized (schema.getSchemaUpdateLock()) {
return doOperations(ops); return doOperations(ops);
}
} else { } else {
return singletonList(singletonMap(CommandOperation.ERR_MSGS, "schema is not editable")); return singletonList(singletonMap(CommandOperation.ERR_MSGS, "schema is not editable"));
} }
@ -107,6 +105,10 @@ public class SchemaManager {
TimeOut timeOut = new TimeOut(timeout, TimeUnit.SECONDS); TimeOut timeOut = new TimeOut(timeout, TimeUnit.SECONDS);
SolrCore core = req.getCore(); SolrCore core = req.getCore();
String errorMsg = "Unable to persist managed schema. "; String errorMsg = "Unable to persist managed schema. ";
List errors = Collections.emptyList();
int latestVersion = -1;
synchronized (req.getSchema().getSchemaUpdateLock()) {
while (!timeOut.hasTimedOut()) { while (!timeOut.hasTimedOut()) {
managedIndexSchema = getFreshManagedSchema(req.getCore()); managedIndexSchema = getFreshManagedSchema(req.getCore());
for (CommandOperation op : operations) { for (CommandOperation op : operations) {
@ -117,8 +119,8 @@ public class SchemaManager {
op.addError("No such operation : " + op.name); op.addError("No such operation : " + op.name);
} }
} }
List errs = CommandOperation.captureErrors(operations); errors = CommandOperation.captureErrors(operations);
if (!errs.isEmpty()) return errs; if (!errors.isEmpty()) break;
SolrResourceLoader loader = req.getCore().getResourceLoader(); SolrResourceLoader loader = req.getCore().getResourceLoader();
if (loader instanceof ZkSolrResourceLoader) { if (loader instanceof ZkSolrResourceLoader) {
ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader; ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader;
@ -131,11 +133,11 @@ public class SchemaManager {
} }
try { try {
int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(), latestVersion = ZkController.persistConfigResourceToZooKeeper
managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true); (zkLoader, managedIndexSchema.getSchemaZkVersion(), managedIndexSchema.getResourceName(),
sw.toString().getBytes(StandardCharsets.UTF_8), true);
req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName()); req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
waitForOtherReplicasToUpdate(timeOut, latestVersion); break;
return Collections.emptyList();
} catch (ZkController.ResourceModifiedInZkException e) { } catch (ZkController.ResourceModifiedInZkException e) {
log.info("Schema was modified by another node. Retrying.."); log.info("Schema was modified by another node. Retrying..");
} }
@ -144,15 +146,24 @@ public class SchemaManager {
//only for non cloud stuff //only for non cloud stuff
managedIndexSchema.persistManagedSchema(false); managedIndexSchema.persistManagedSchema(false);
core.setLatestSchema(managedIndexSchema); core.setLatestSchema(managedIndexSchema);
return Collections.emptyList();
} catch (SolrException e) { } catch (SolrException e) {
log.warn(errorMsg); log.warn(errorMsg);
return singletonList(errorMsg + e.getMessage()); errors = singletonList(errorMsg + e.getMessage());
}
break;
} }
} }
} }
if (req.getCore().getResourceLoader() instanceof ZkSolrResourceLoader) {
// Don't block further schema updates while waiting for a pending update to propagate to other replicas.
// This reduces the likelihood of a (time-limited) distributed deadlock during concurrent schema updates.
waitForOtherReplicasToUpdate(timeOut, latestVersion);
}
if (errors.isEmpty() && timeOut.hasTimedOut()) {
log.warn(errorMsg + "Timed out."); log.warn(errorMsg + "Timed out.");
return singletonList(errorMsg + "Timed out."); errors = singletonList(errorMsg + "Timed out.");
}
return errors;
} }
private void waitForOtherReplicasToUpdate(TimeOut timeOut, int latestVersion) { private void waitForOtherReplicasToUpdate(TimeOut timeOut, int latestVersion) {