mirror of https://github.com/apache/lucene.git
SOLR-3995: Recovery may never finish on SolrCore shutdown if the last reference to a SolrCore is closed by the recovery process
SOLR-3994:Create more extensive tests around unloading cores. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1402393 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
78b1e07dc9
commit
80b39a3ba7
|
@ -113,6 +113,8 @@ Bug Fixes
|
||||||
* SOLR-3992: QuerySenderListener doesn't populate document cache.
|
* SOLR-3992: QuerySenderListener doesn't populate document cache.
|
||||||
(Shotaro Kamio, yonik)
|
(Shotaro Kamio, yonik)
|
||||||
|
|
||||||
|
* SOLR-3995: Recovery may never finish on SolrCore shutdown if the last reference to
|
||||||
|
a SolrCore is closed by the recovery process. (Mark Miller)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.update.UpdateLog;
|
import org.apache.solr.update.UpdateLog;
|
||||||
import org.apache.zookeeper.CreateMode;
|
import org.apache.zookeeper.CreateMode;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.apache.zookeeper.KeeperException.NoNodeException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -38,7 +39,7 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public abstract class ElectionContext {
|
public abstract class ElectionContext {
|
||||||
|
private static Logger log = LoggerFactory.getLogger(ElectionContext.class);
|
||||||
final String electionPath;
|
final String electionPath;
|
||||||
final ZkNodeProps leaderProps;
|
final ZkNodeProps leaderProps;
|
||||||
final String id;
|
final String id;
|
||||||
|
@ -58,7 +59,12 @@ public abstract class ElectionContext {
|
||||||
public void close() {}
|
public void close() {}
|
||||||
|
|
||||||
public void cancelElection() throws InterruptedException, KeeperException {
|
public void cancelElection() throws InterruptedException, KeeperException {
|
||||||
|
try {
|
||||||
zkClient.delete(leaderSeqPath, -1, true);
|
zkClient.delete(leaderSeqPath, -1, true);
|
||||||
|
} catch (NoNodeException e) {
|
||||||
|
// fine
|
||||||
|
log.warn("cancelElection did not find election node to remove");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException;
|
abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException;
|
||||||
|
|
|
@ -84,6 +84,10 @@ public class LeaderElector {
|
||||||
|
|
||||||
sortSeqs(seqs);
|
sortSeqs(seqs);
|
||||||
List<Integer> intSeqs = getSeqs(seqs);
|
List<Integer> intSeqs = getSeqs(seqs);
|
||||||
|
if (intSeqs.size() == 0) {
|
||||||
|
log.warn("Our node is no longer in line to be leader");
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (seq <= intSeqs.get(0)) {
|
if (seq <= intSeqs.get(0)) {
|
||||||
// first we delete the node advertising the old leader in case the ephem is still there
|
// first we delete the node advertising the old leader in case the ephem is still there
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -313,7 +313,7 @@ public class RecoveryStrategy extends Thread implements ClosableThread {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!successfulRecovery && !isInterrupted()) { // don't use interruption or it will close channels though
|
while (!successfulRecovery && !isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
|
||||||
try {
|
try {
|
||||||
CloudDescriptor cloudDesc = core.getCoreDescriptor()
|
CloudDescriptor cloudDesc = core.getCoreDescriptor()
|
||||||
.getCloudDescriptor();
|
.getCloudDescriptor();
|
||||||
|
|
|
@ -1072,7 +1072,9 @@ public class CoreContainer
|
||||||
|
|
||||||
synchronized(cores) {
|
synchronized(cores) {
|
||||||
SolrCore core = cores.remove( name );
|
SolrCore core = cores.remove( name );
|
||||||
|
if (core != null) {
|
||||||
coreToOrigName.remove(core);
|
coreToOrigName.remove(core);
|
||||||
|
}
|
||||||
return core;
|
return core;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -668,7 +668,12 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (core != null) core.close();
|
if (core != null) {
|
||||||
|
if (coreContainer.getZkController() != null) {
|
||||||
|
core.getSolrCoreState().cancelRecovery();
|
||||||
|
}
|
||||||
|
core.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return coreContainer.isPersistent();
|
return coreContainer.isPersistent();
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover
|
||||||
|
|
||||||
private volatile boolean recoveryRunning;
|
private volatile boolean recoveryRunning;
|
||||||
private RecoveryStrategy recoveryStrat;
|
private RecoveryStrategy recoveryStrat;
|
||||||
private boolean closed = false;
|
private volatile boolean closed = false;
|
||||||
|
|
||||||
private RefCounted<IndexWriter> refCntWriter;
|
private RefCounted<IndexWriter> refCntWriter;
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,6 @@ import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
import org.apache.solr.common.cloud.ZkStateReader;
|
||||||
import org.apache.solr.common.params.CollectionParams.CollectionAction;
|
import org.apache.solr.common.params.CollectionParams.CollectionAction;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.CoreAdminParams;
|
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.params.UpdateParams;
|
import org.apache.solr.common.params.UpdateParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
@ -322,19 +321,19 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
|
|
||||||
// would be better if these where all separate tests - but much, much
|
// would be better if these where all separate tests - but much, much
|
||||||
// slower
|
// slower
|
||||||
// doOptimisticLockingAndUpdating();
|
doOptimisticLockingAndUpdating();
|
||||||
// testMultipleCollections();
|
testMultipleCollections();
|
||||||
// testANewCollectionInOneInstance();
|
testANewCollectionInOneInstance();
|
||||||
// testSearchByCollectionName();
|
testSearchByCollectionName();
|
||||||
// testANewCollectionInOneInstanceWithManualShardAssignement();
|
testANewCollectionInOneInstanceWithManualShardAssignement();
|
||||||
// testNumberOfCommitsWithCommitAfterAdd();
|
testNumberOfCommitsWithCommitAfterAdd();
|
||||||
//
|
|
||||||
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
|
|
||||||
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
|
|
||||||
//
|
|
||||||
// testCollectionsAPI();
|
|
||||||
testCoreUnloadAndLeaders();
|
|
||||||
|
|
||||||
|
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
|
||||||
|
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
|
||||||
|
|
||||||
|
testCollectionsAPI();
|
||||||
|
testCoreUnloadAndLeaders();
|
||||||
|
testUnloadLotsOfCores();
|
||||||
// Thread.sleep(10000000000L);
|
// Thread.sleep(10000000000L);
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
super.printLayout();
|
super.printLayout();
|
||||||
|
@ -412,8 +411,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
createCmd.setDataDir(core3dataDir);
|
createCmd.setDataDir(core3dataDir);
|
||||||
server.request(createCmd);
|
server.request(createCmd);
|
||||||
|
|
||||||
Thread.sleep(1000);
|
|
||||||
|
|
||||||
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
||||||
|
|
||||||
// so that we start with some versions when we reload...
|
// so that we start with some versions when we reload...
|
||||||
|
@ -474,8 +471,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
createCmd.setDataDir(core4dataDir);
|
createCmd.setDataDir(core4dataDir);
|
||||||
server.request(createCmd);
|
server.request(createCmd);
|
||||||
|
|
||||||
Thread.sleep(1000);
|
|
||||||
|
|
||||||
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
||||||
|
|
||||||
// unload the leader again
|
// unload the leader again
|
||||||
|
@ -510,8 +505,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
createCmd.setDataDir(core1DataDir);
|
createCmd.setDataDir(core1DataDir);
|
||||||
server.request(createCmd);
|
server.request(createCmd);
|
||||||
|
|
||||||
Thread.sleep(1000);
|
|
||||||
|
|
||||||
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);
|
||||||
|
|
||||||
|
|
||||||
|
@ -539,6 +532,64 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void testUnloadLotsOfCores() throws Exception {
|
||||||
|
SolrServer client = clients.get(2);
|
||||||
|
String url3 = getBaseUrl(client);
|
||||||
|
final HttpSolrServer server = new HttpSolrServer(url3);
|
||||||
|
|
||||||
|
ThreadPoolExecutor executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE,
|
||||||
|
5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
|
||||||
|
new DefaultSolrThreadFactory("testExecutor"));
|
||||||
|
int cnt = atLeast(6);
|
||||||
|
for (int i = 0; i < cnt; i++) {
|
||||||
|
final int freezeI = i;
|
||||||
|
executor.execute(new Runnable() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Create createCmd = new Create();
|
||||||
|
createCmd.setCoreName("multiunload" + freezeI);
|
||||||
|
createCmd.setCollection("multiunload");
|
||||||
|
String core3dataDir = dataDir.getAbsolutePath() + File.separator
|
||||||
|
+ System.currentTimeMillis() + "unloadcollection" + "_3n" + freezeI;
|
||||||
|
createCmd.setDataDir(core3dataDir);
|
||||||
|
try {
|
||||||
|
server.request(createCmd);
|
||||||
|
} catch (SolrServerException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
}
|
||||||
|
executor.shutdown();
|
||||||
|
executor.awaitTermination(120, TimeUnit.SECONDS);
|
||||||
|
executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5,
|
||||||
|
TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
|
||||||
|
new DefaultSolrThreadFactory("testExecutor"));
|
||||||
|
for (int j = 0; j < cnt; j++) {
|
||||||
|
final int freezeJ = j;
|
||||||
|
executor.execute(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Unload unloadCmd = new Unload(true);
|
||||||
|
unloadCmd.setCoreName("multiunload" + freezeJ);
|
||||||
|
try {
|
||||||
|
server.request(unloadCmd);
|
||||||
|
} catch (SolrServerException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Thread.sleep(random().nextInt(50));
|
||||||
|
}
|
||||||
|
executor.shutdown();
|
||||||
|
executor.awaitTermination(120, TimeUnit.SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
private String getBaseUrl(SolrServer client) {
|
private String getBaseUrl(SolrServer client) {
|
||||||
String url2 = ((HttpSolrServer) client).getBaseURL()
|
String url2 = ((HttpSolrServer) client).getBaseURL()
|
||||||
|
@ -794,7 +845,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||||
}
|
}
|
||||||
Thread.sleep(50);
|
Thread.sleep(50);
|
||||||
}
|
}
|
||||||
printLayout();
|
|
||||||
fail("Could not find the new collection - " + exp.code() + " : " + collectionClient.getBaseURL());
|
fail("Could not find the new collection - " + exp.code() + " : " + collectionClient.getBaseURL());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -211,7 +211,6 @@ public class CloudSolrServer extends SolrServer {
|
||||||
if ((sendToLeaders && leaderUrlList == null) || (!sendToLeaders
|
if ((sendToLeaders && leaderUrlList == null) || (!sendToLeaders
|
||||||
&& urlList == null)
|
&& urlList == null)
|
||||||
|| clusterState.hashCode() != this.lastClusterStateHashCode) {
|
|| clusterState.hashCode() != this.lastClusterStateHashCode) {
|
||||||
System.out.println("build a new map for " + collection);
|
|
||||||
// build a map of unique nodes
|
// build a map of unique nodes
|
||||||
// TODO: allow filtering by group, role, etc
|
// TODO: allow filtering by group, role, etc
|
||||||
Map<String,ZkNodeProps> nodes = new HashMap<String,ZkNodeProps>();
|
Map<String,ZkNodeProps> nodes = new HashMap<String,ZkNodeProps>();
|
||||||
|
|
|
@ -117,6 +117,7 @@ public class ClusterState implements JSONWriter.Writable {
|
||||||
*/
|
*/
|
||||||
public Replica getShardProps(final String collection, final String coreNodeName) {
|
public Replica getShardProps(final String collection, final String coreNodeName) {
|
||||||
Map<String, Slice> slices = getSlices(collection);
|
Map<String, Slice> slices = getSlices(collection);
|
||||||
|
if (slices == null) return null;
|
||||||
for(Slice slice: slices.values()) {
|
for(Slice slice: slices.values()) {
|
||||||
if(slice.getReplicasMap().get(coreNodeName)!=null) {
|
if(slice.getReplicasMap().get(coreNodeName)!=null) {
|
||||||
return slice.getReplicasMap().get(coreNodeName);
|
return slice.getReplicasMap().get(coreNodeName);
|
||||||
|
|
Loading…
Reference in New Issue