mirror of https://github.com/apache/lucene.git
SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
This commit is contained in:
parent
e1698bda95
commit
2c49c4a27d
|
@ -167,6 +167,9 @@ Bug Fixes
|
|||
|
||||
* SOLR-14940: ReplicationHandler memory leak through SolrCore.closeHooks with unstable ZK connection. (Anver Sotnikov, Mike Drob)
|
||||
|
||||
* SOLR-14969: Prevent creating multiple cores with the same name which leads to instabilities (race condition)
|
||||
(Erick Erickson, Andreas Hubold)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -1244,6 +1245,7 @@ public class CoreContainer {
|
|||
return create(coreName, cfg.getCoreRootDirectory().resolve(coreName), parameters, false);
|
||||
}
|
||||
|
||||
Set<String> inFlightCreations = new HashSet<>(); // See SOLR-14969
|
||||
/**
|
||||
* Creates a new core in a specified instance directory, publishing the core state to the cluster
|
||||
*
|
||||
|
@ -1253,77 +1255,93 @@ public class CoreContainer {
|
|||
* @return the newly created core
|
||||
*/
|
||||
public SolrCore create(String coreName, Path instancePath, Map<String, String> parameters, boolean newCollection) {
|
||||
|
||||
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
|
||||
|
||||
// TODO: There's a race here, isn't there?
|
||||
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
|
||||
if (getAllCoreNames().contains(coreName)) {
|
||||
log.warn("Creating a core with existing name is not allowed");
|
||||
// TODO: Shouldn't this be a BAD_REQUEST?
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
|
||||
}
|
||||
|
||||
// Validate paths are relative to known locations to avoid path traversal
|
||||
assertPathAllowed(cd.getInstanceDir());
|
||||
assertPathAllowed(Paths.get(cd.getDataDir()));
|
||||
|
||||
boolean preExisitingZkEntry = false;
|
||||
SolrCore core = null;
|
||||
boolean iAdded = false;
|
||||
try {
|
||||
if (getZkController() != null) {
|
||||
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "coreNodeName missing " + parameters.toString());
|
||||
synchronized (inFlightCreations) {
|
||||
if (inFlightCreations.add(coreName)) {
|
||||
iAdded = true;
|
||||
} else {
|
||||
String msg = "Already creating a core with name '" + coreName + "', call aborted '";
|
||||
log.warn(msg);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, msg);
|
||||
}
|
||||
preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
|
||||
}
|
||||
CoreDescriptor cd = new CoreDescriptor(coreName, instancePath, parameters, getContainerProperties(), getZkController());
|
||||
|
||||
// Since the core descriptor is removed when a core is unloaded, it should never be anywhere when a core is created.
|
||||
if (getAllCoreNames().contains(coreName)) {
|
||||
log.warn("Creating a core with existing name is not allowed: '{}'", coreName);
|
||||
// TODO: Shouldn't this be a BAD_REQUEST?
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Core with name '" + coreName + "' already exists.");
|
||||
}
|
||||
|
||||
// Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
|
||||
// first and clean it up if there's an error.
|
||||
coresLocator.create(this, cd);
|
||||
// Validate paths are relative to known locations to avoid path traversal
|
||||
assertPathAllowed(cd.getInstanceDir());
|
||||
assertPathAllowed(Paths.get(cd.getDataDir()));
|
||||
|
||||
SolrCore core = null;
|
||||
boolean preExisitingZkEntry = false;
|
||||
try {
|
||||
solrCores.waitAddPendingCoreOps(cd.getName());
|
||||
core = createFromDescriptor(cd, true, newCollection);
|
||||
coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
|
||||
} finally {
|
||||
solrCores.removeFromPendingOps(cd.getName());
|
||||
}
|
||||
if (getZkController() != null) {
|
||||
if (cd.getCloudDescriptor().getCoreNodeName() == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "coreNodeName missing " + parameters.toString());
|
||||
}
|
||||
preExisitingZkEntry = getZkController().checkIfCoreNodeNameAlreadyExists(cd);
|
||||
}
|
||||
|
||||
// Much of the logic in core handling pre-supposes that the core.properties file already exists, so create it
|
||||
// first and clean it up if there's an error.
|
||||
coresLocator.create(this, cd);
|
||||
|
||||
return core;
|
||||
} catch (Exception ex) {
|
||||
// First clean up any core descriptor, there should never be an existing core.properties file for any core that
|
||||
// failed to be created on-the-fly.
|
||||
coresLocator.delete(this, cd);
|
||||
if (isZooKeeperAware() && !preExisitingZkEntry) {
|
||||
try {
|
||||
getZkController().unregister(coreName, cd);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
SolrException.log(log, null, e);
|
||||
} catch (KeeperException e) {
|
||||
SolrException.log(log, null, e);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, null, e);
|
||||
solrCores.waitAddPendingCoreOps(cd.getName());
|
||||
core = createFromDescriptor(cd, true, newCollection);
|
||||
coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created
|
||||
} finally {
|
||||
solrCores.removeFromPendingOps(cd.getName());
|
||||
}
|
||||
|
||||
return core;
|
||||
} catch (Exception ex) {
|
||||
// First clean up any core descriptor, there should never be an existing core.properties file for any core that
|
||||
// failed to be created on-the-fly.
|
||||
coresLocator.delete(this, cd);
|
||||
if (isZooKeeperAware() && !preExisitingZkEntry) {
|
||||
try {
|
||||
getZkController().unregister(coreName, cd);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
SolrException.log(log, null, e);
|
||||
} catch (KeeperException e) {
|
||||
SolrException.log(log, null, e);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, null, e);
|
||||
}
|
||||
}
|
||||
|
||||
Throwable tc = ex;
|
||||
Throwable c = null;
|
||||
do {
|
||||
tc = tc.getCause();
|
||||
if (tc != null) {
|
||||
c = tc;
|
||||
}
|
||||
} while (tc != null);
|
||||
|
||||
String rootMsg = "";
|
||||
if (c != null) {
|
||||
rootMsg = " Caused by: " + c.getMessage();
|
||||
}
|
||||
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
|
||||
}
|
||||
} finally {
|
||||
synchronized (inFlightCreations) {
|
||||
if (iAdded) {
|
||||
inFlightCreations.remove(coreName);
|
||||
}
|
||||
}
|
||||
|
||||
Throwable tc = ex;
|
||||
Throwable c = null;
|
||||
do {
|
||||
tc = tc.getCause();
|
||||
if (tc != null) {
|
||||
c = tc;
|
||||
}
|
||||
} while (tc != null);
|
||||
|
||||
String rootMsg = "";
|
||||
if (c != null) {
|
||||
rootMsg = " Caused by: " + c.getMessage();
|
||||
}
|
||||
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error CREATEing SolrCore '" + coreName + "': " + ex.getMessage() + rootMsg, ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1777,6 +1795,7 @@ public class CoreContainer {
|
|||
}
|
||||
|
||||
if (cd == null) {
|
||||
log.warn("Cannot unload non-existent core '{}'", name);
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Cannot unload non-existent core [" + name + "]");
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.File;
|
|||
import java.io.FileOutputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
@ -117,22 +118,27 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
private static class TestReloadThread extends Thread {
|
||||
private final CoreContainer cc;
|
||||
TestReloadThread(CoreContainer cc) {
|
||||
this.cc = cc;
|
||||
}
|
||||
@Override
|
||||
public void run() {
|
||||
cc.reload("core1");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReloadThreaded() throws Exception {
|
||||
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
|
||||
cc.create("core1", ImmutableMap.of("configSet", "minimal"));
|
||||
|
||||
class TestThread extends Thread {
|
||||
@Override
|
||||
public void run() {
|
||||
cc.reload("core1");
|
||||
}
|
||||
}
|
||||
|
||||
List<Thread> threads = new ArrayList<>();
|
||||
int numThreads = 4;
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
threads.add(new TestThread());
|
||||
threads.add(new TestReloadThread(cc));
|
||||
}
|
||||
|
||||
for (Thread thread : threads) {
|
||||
|
@ -147,6 +153,89 @@ public class TestCoreContainer extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
private static class TestCreateThread extends Thread {
|
||||
final CoreContainer cc;
|
||||
final String coreName;
|
||||
boolean foundExpectedError = false;
|
||||
SolrCore core = null;
|
||||
|
||||
TestCreateThread(CoreContainer cc, String coreName) {
|
||||
this.cc = cc;
|
||||
this.coreName = coreName;
|
||||
}
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
core = cc.create(coreName, ImmutableMap.of("configSet", "minimal"));
|
||||
} catch (SolrException e) {
|
||||
String msg = e.getMessage();
|
||||
foundExpectedError = msg.contains("Already creating a core with name") || msg.contains("already exists");
|
||||
}
|
||||
}
|
||||
|
||||
int verifyMe() {
|
||||
if (foundExpectedError) {
|
||||
assertNull("failed create should have returned null for core", core);
|
||||
return 0;
|
||||
} else {
|
||||
assertNotNull("Core should not be null if there was no error", core);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateThreaded() throws Exception {
|
||||
final CoreContainer cc = init(CONFIGSETS_SOLR_XML);
|
||||
final int NUM_THREADS = 3;
|
||||
|
||||
|
||||
// Try this a few times to increase the chances of failure.
|
||||
for (int idx = 0; idx < 3; ++idx) {
|
||||
TestCreateThread[] threads = new TestCreateThread[NUM_THREADS];
|
||||
// Let's add a little stress in here by using the same core name each
|
||||
// time around. The final unload in the loop should delete the core and
|
||||
// allow the next time around to succeed.
|
||||
// This also checks the bookkeeping in CoreContainer.create
|
||||
// that prevents muliple simulatneous creations,
|
||||
// currently "inFlightCreations"
|
||||
String testName = "coreToTest";
|
||||
for (int thread = 0; thread < NUM_THREADS; ++thread) {
|
||||
threads[thread] = new TestCreateThread(cc, testName);
|
||||
}
|
||||
// only one of these should succeed.
|
||||
for (int thread = 0; thread < NUM_THREADS; ++thread) {
|
||||
threads[thread].start();
|
||||
}
|
||||
|
||||
for (int thread = 0; thread < NUM_THREADS; ++thread) {
|
||||
threads[thread].join();
|
||||
}
|
||||
|
||||
// We can't guarantee that which thread failed, so go find it
|
||||
int goodCount = 0;
|
||||
for (int thread = 0; thread < NUM_THREADS; ++thread) {
|
||||
goodCount += threads[thread].verifyMe();
|
||||
}
|
||||
assertEquals("Only one create should have succeeded", 1, goodCount);
|
||||
|
||||
|
||||
// Check bookkeeping by removing and creating the core again, making sure
|
||||
// we didn't leave the record of trying to create this core around.
|
||||
// NOTE: unloading the core closes it too.
|
||||
cc.unload(testName, true, true, true);
|
||||
cc.create(testName, ImmutableMap.of("configSet", "minimal"));
|
||||
// This call should fail with a different error because the core was
|
||||
// created successfully.
|
||||
SolrException thrown = expectThrows(SolrException.class, () ->
|
||||
cc.create(testName, ImmutableMap.of("configSet", "minimal")));
|
||||
assertTrue("Should have 'already exists' error", thrown.getMessage().contains("already exists"));
|
||||
|
||||
cc.unload(testName, true, true, true);
|
||||
}
|
||||
cc.shutdown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoCores() throws Exception {
|
||||
|
||||
|
|
Loading…
Reference in New Issue