SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)

This commit is contained in:
Erick Erickson 2020-10-31 19:34:09 -04:00
parent e1698bda95
commit 2c49c4a27d
3 changed files with 179 additions and 68 deletions

View File

@ -167,6 +167,9 @@ Bug Fixes
* SOLR-14940: ReplicationHandler memory leak through SolrCore.closeHooks with unstable ZK connection. (Anver Sotnikov, Mike Drob) * SOLR-14940: ReplicationHandler memory leak through SolrCore.closeHooks with unstable ZK connection. (Anver Sotnikov, Mike Drob)
* SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
(Erick Erickson, Andreas Hubold)
Other Changes Other Changes
--------------------- ---------------------

View File

@ -30,6 +30,7 @@ import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -1244,6 +1245,7 @@ public class CoreContainer {
return create(coreName, cfg.getCoreRootDirectory().resolve(coreName), parameters, false); return create(coreName, cfg.getCoreRootDirectory().resolve(coreName), parameters, false);
} }
Set<String> inFlightCreations = new HashSet<>(); // See SOLR-14969
/** /**
* Creates a new core in a specified instance directory, publishing the core state to the cluster * Creates a new core in a specified instance directory, publishing the core state to the cluster
* *
@ -1253,13 +1255,23 @@ public class CoreContainer {
* @return the newly created core * @return the newly created core
*/ */
public SolrCore create(String coreName, Path instancePath, Map<String, String> parameters, boolean newCollection) { public SolrCore create(String coreName, Path instancePath, Map<String, String> parameters, boolean newCollection) {
SolrCore core = null;
boolean iAdded = false;
try {
synchronized (inFlightCreations) {
if (inFlightCreations.add(coreName)) {
iAdded = true;
} else {
String msg = "Already creating a core with name '" + coreName + "', call aborted '";
log.warn(msg);
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
}
}
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController()); CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
// TODO: There's a race here, isn't there?
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created. // Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
if (getAllCoreNames().contains(coreName)) { if (getAllCoreNames().contains(coreName)) {
log.warn("Creating a core with existing name is not allowed"); log.warn("Creating a core with existing name is not allowed: '{}'", coreName);
// TODO: Shouldn't this be a BAD_REQUEST? // TODO: Shouldn't this be a BAD_REQUEST?
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists."); throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
} }
@ -1281,7 +1293,6 @@ public class CoreContainer {
// first and clean it up if there's an error. // first and clean it up if there's an error.
coresLocator.create(this, cd); coresLocator.create(this, cd);
SolrCore core = null;
try { try {
solrCores.waitAddPendingCoreOps(cd.getName()); solrCores.waitAddPendingCoreOps(cd.getName());
core = createFromDescriptor(cd, true, newCollection); core = createFromDescriptor(cd, true, newCollection);
@ -1325,6 +1336,13 @@ public class CoreContainer {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex); "Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
} }
} finally {
synchronized (inFlightCreations) {
if (iAdded) {
inFlightCreations.remove(coreName);
}
}
}
} }
/** /**
@ -1777,6 +1795,7 @@ public class CoreContainer {
} }
if (cd == null) { if (cd == null) {
log.warn("Cannot unload non-existent core '{}'", name);
throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]"); throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]");
} }

View File

@ -20,6 +20,7 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -117,22 +118,27 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
} }
} }
@Test private static class TestReloadThread extends Thread {
public void testReloadThreaded() throws Exception { private final CoreContainer cc;
final CoreContainer cc = init(CONFIGSETS_SOLR_XML); TestReloadThread(CoreContainer cc) {
cc.create("core1", ImmutableMap.of("configSet", "minimal")); this.cc = cc;
}
class TestThread extends Thread {
@Override @Override
public void run() { public void run() {
cc.reload("core1"); cc.reload("core1");
} }
} }
@Test
public void testReloadThreaded() throws Exception {
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
cc.create("core1", ImmutableMap.of("configSet", "minimal"));
List<Thread> threads = new ArrayList<>(); List<Thread> threads = new ArrayList<>();
int numThreads = 4; int numThreads = 4;
for (int i = 0; i < numThreads; i++) { for (int i = 0; i < numThreads; i++) {
threads.add(new TestThread()); threads.add(new TestReloadThread(cc));
} }
for (Thread thread : threads) { for (Thread thread : threads) {
@ -147,6 +153,89 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
} }
private static class TestCreateThread extends Thread {
final CoreContainer cc;
final String coreName;
boolean foundExpectedError = false;
SolrCore core = null;
TestCreateThread(CoreContainer cc, String coreName) {
this.cc = cc;
this.coreName = coreName;
}
@Override
public void run() {
try {
core = cc.create(coreName, ImmutableMap.of("configSet", "minimal"));
} catch (SolrException e) {
String msg = e.getMessage();
foundExpectedError = msg.contains("Already creating a core with name") || msg.contains("already exists");
}
}
int verifyMe() {
if (foundExpectedError) {
assertNull("failed create should have returned null for core", core);
return 0;
} else {
assertNotNull("Core should not be null if there was no error", core);
return 1;
}
}
}
@Test
public void testCreateThreaded() throws Exception {
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
final int NUM_THREADS = 3;
// Try this a few times to increase the chances of failure.
for (int idx = 0; idx < 3; ++idx) {
TestCreateThread[] threads = new TestCreateThread[NUM_THREADS];
// Let's add a little stress in here by using the same core name each
// time around. The final unload in the loop should delete the core and
// allow the next time around to succeed.
// This also checks the bookkeeping in CoreContainer.create
// that prevents muliple simulatneous creations,
// currently "inFlightCreations"
String testName = "coreToTest";
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread] = new TestCreateThread(cc, testName);
}
// only one of these should succeed.
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread].start();
}
for (int thread = 0; thread < NUM_THREADS; ++thread) {
threads[thread].join();
}
// We can't guarantee that which thread failed, so go find it
int goodCount = 0;
for (int thread = 0; thread < NUM_THREADS; ++thread) {
goodCount += threads[thread].verifyMe();
}
assertEquals("Only one create should have succeeded", 1, goodCount);
// Check bookkeeping by removing and creating the core again, making sure
// we didn't leave the record of trying to create this core around.
// NOTE: unloading the core closes it too.
cc.unload(testName, true, true, true);
cc.create(testName, ImmutableMap.of("configSet", "minimal"));
// This call should fail with a different error because the core was
// created successfully.
SolrException thrown = expectThrows(SolrException.class, () ->
cc.create(testName, ImmutableMap.of("configSet", "minimal")));
assertTrue("Should have 'already exists' error", thrown.getMessage().contains("already exists"));
cc.unload(testName, true, true, true);
}
cc.shutdown();
}
@Test @Test
public void testNoCores() throws Exception { public void testNoCores() throws Exception {