SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)

This commit is contained in:
Erick Erickson 2020-10-31 19:34:09 -04:00
parent e1698bda95
commit 2c49c4a27d
3 changed files with 179 additions and 68 deletions

View File

@ -167,6 +167,9 @@ Bug Fixes
* SOLR-14940: ReplicationHandler memory leak through SolrCore.closeHooks with unstable ZK connection. (Anver Sotnikov, Mike Drob)
* SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
(Erick Erickson, Andreas Hubold)
Other Changes
---------------------

View File

@ -30,6 +30,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
@ -1244,6 +1245,7 @@ public class CoreContainer {
return create(coreName, cfg.getCoreRootDirectory().resolve(coreName), parameters, false);
}
Set<String> inFlightCreations = new HashSet<>(); // See SOLR-14969
/**
* Creates a new core in a specified instance directory, publishing the core state to the cluster
*
@ -1253,77 +1255,93 @@ public class CoreContainer {
* @return the newly created core
*/
public SolrCore create(String coreName, Path instancePath, Map<String, String> parameters, boolean newCollection) {
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
// TODO: There's a race here, isn't there?
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
if (getAllCoreNames().contains(coreName)) {
log.warn("Creating a core with existing name is not allowed");
// TODO: Shouldn't this be a BAD_REQUEST?
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
}
// Validate paths are relative to known locations to avoid path traversal
assertPathAllowed(cd.getInstanceDir());
assertPathAllowed(Paths.get(cd.getDataDir()));
boolean preExisitingZkEntry = false;
SolrCore core = null;
boolean iAdded = false;
try {
if (getZkController() != null) {
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "coreNodeName missing " + parameters.toString());
synchronized (inFlightCreations) {
if (inFlightCreations.add(coreName)) {
iAdded = true;
} else {
String msg = "Already creating a core with name '" + coreName + "', call aborted '";
log.warn(msg);
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
}
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
if (getAllCoreNames().contains(coreName)) {
log.warn("Creating a core with existing name is not allowed: '{}'", coreName);
// TODO: Shouldn't this be a BAD_REQUEST?
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
}
// Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
// first and clean it up if there's an error.
coresLocator.create(this, cd);
// Validate paths are relative to known locations to avoid path traversal
assertPathAllowed(cd.getInstanceDir());
assertPathAllowed(Paths.get(cd.getDataDir()));
SolrCore core = null;
boolean preExisitingZkEntry = false;
try {
solrCores.waitAddPendingCoreOps(cd.getName());
core = createFromDescriptor(cd, true, newCollection);
coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
} finally {
solrCores.removeFromPendingOps(cd.getName());
}
if (getZkController() != null) {
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "coreNodeName missing " + parameters.toString());
}
preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
}
// Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
// first and clean it up if there's an error.
coresLocator.create(this, cd);
return core;
} catch (Exception ex) {
// First clean up any core descriptor, there should never be an existing core.properties file for any core that
// failed to be created on-the-fly.
coresLocator.delete(this, cd);
if (isZooKeeperAware() && !preExisitingZkEntry) {
try {
getZkController().unregister(coreName, cd);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
SolrException.log(log, null, e);
} catch (KeeperException e) {
SolrException.log(log, null, e);
} catch (Exception e) {
SolrException.log(log, null, e);
solrCores.waitAddPendingCoreOps(cd.getName());
core = createFromDescriptor(cd, true, newCollection);
coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
} finally {
solrCores.removeFromPendingOps(cd.getName());
}
return core;
} catch (Exception ex) {
// First clean up any core descriptor, there should never be an existing core.properties file for any core that
// failed to be created on-the-fly.
coresLocator.delete(this, cd);
if (isZooKeeperAware() && !preExisitingZkEntry) {
try {
getZkController().unregister(coreName, cd);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
SolrException.log(log, null, e);
} catch (KeeperException e) {
SolrException.log(log, null, e);
} catch (Exception e) {
SolrException.log(log, null, e);
}
}
Throwable tc = ex;
Throwable c = null;
do {
tc = tc.getCause();
if (tc != null) {
c = tc;
}
} while (tc != null);
String rootMsg = "";
if (c != null) {
rootMsg = " Caused by: " + c.getMessage();
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
}
} finally {
synchronized (inFlightCreations) {
if (iAdded) {
inFlightCreations.remove(coreName);
}
}
Throwable tc = ex;
Throwable c = null;
do {
tc = tc.getCause();
if (tc != null) {
c = tc;
}
} while (tc != null);
String rootMsg = "";
if (c != null) {
rootMsg = " Caused by: " + c.getMessage();
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
}
}
@ -1777,6 +1795,7 @@ public class CoreContainer {
}
if (cd == null) {
log.warn("Cannot unload non-existent core '{}'", name);
throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]");
}

View File

@ -20,6 +20,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -117,22 +118,27 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
}
}
private static class TestReloadThread extends Thread {
private final CoreContainer cc;
TestReloadThread(CoreContainer cc) {
this.cc = cc;
}
@Override
public void run() {
cc.reload("core1");
}
}
@Test
public void testReloadThreaded() throws Exception {
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
cc.create("core1", ImmutableMap.of("configSet", "minimal"));
class TestThread extends Thread {
@Override
public void run() {
cc.reload("core1");
}
}
List<Thread> threads = new ArrayList<>();
int numThreads = 4;
for (int i = 0; i < numThreads; i++) {
threads.add(new TestThread());
threads.add(new TestReloadThread(cc));
}
for (Thread thread : threads) {
@ -147,6 +153,89 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
}
private static class TestCreateThread extends Thread {
final CoreContainer cc;
final String coreName;
boolean foundExpectedError = false;
SolrCore core = null;
TestCreateThread(CoreContainer cc, String coreName) {
this.cc = cc;
this.coreName = coreName;
}
@Override
public void run() {
try {
core = cc.create(coreName, ImmutableMap.of("configSet", "minimal"));
} catch (SolrException e) {
String msg = e.getMessage();
foundExpectedError = msg.contains("Already creating a core with name") || msg.contains("already exists");
}
}
int verifyMe() {
if (foundExpectedError) {
assertNull("failed create should have returned null for core", core);
return 0;
} else {
assertNotNull("Core should not be null if there was no error", core);
return 1;
}
}
}
@Test
public void testCreateThreaded() throws Exception {
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
final int NUM_THREADS = 3;
// Try this a few times to increase the chances of failure.
for (int idx = 0; idx < 3; ++idx) {
TestCreateThread[] threads = new TestCreateThread[NUM_THREADS];
// Let's add a little stress in here by using the same core name each
// time around. The final unload in the loop should delete the core and
// allow the next time around to succeed.
// This also checks the bookkeeping in CoreContainer.create
// that prevents muliple simulatneous creations,
// currently "inFlightCreations"
String testName = "coreToTest";
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread] = new TestCreateThread(cc, testName);
}
// only one of these should succeed.
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread].start();
}
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread].join();
}
// We can't guarantee that which thread failed, so go find it
int goodCount = 0;
for (int thread = 0; thread < NUM_THREADS; ++thread) {
goodCount += threads[thread].verifyMe();
}
assertEquals("Only one create should have succeeded", 1, goodCount);
// Check bookkeeping by removing and creating the core again, making sure
// we didn't leave the record of trying to create this core around.
// NOTE: unloading the core closes it too.
cc.unload(testName, true, true, true);
cc.create(testName, ImmutableMap.of("configSet", "minimal"));
// This call should fail with a different error because the core was
// created successfully.
SolrException thrown = expectThrows(SolrException.class, () ->
cc.create(testName, ImmutableMap.of("configSet", "minimal")));
assertTrue("Should have 'already exists' error", thrown.getMessage().contains("already exists"));
cc.unload(testName, true, true, true);
}
cc.shutdown();
}
@Test
public void testNoCores() throws Exception {