SOLR-4421,SOLR-4165: On CoreContainer shutdown, all SolrCores should publish their state as DOWN.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1446914 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2013-02-16 16:59:53 +00:00
parent 3a4103d19b
commit 384d42b5e3
4 changed files with 90 additions and 89 deletions

View File

@ -129,6 +129,9 @@ Bug Fixes
kick in when using NRTCachingDirectory or the rate limiting feature. kick in when using NRTCachingDirectory or the rate limiting feature.
(Mark Miller) (Mark Miller)
* SOLR-4421,SOLR-4165: On CoreContainer shutdown, all SolrCores should publish their
state as DOWN. (Mark Miller, Markus Jelsma)
Optimizations Optimizations
---------------------- ----------------------
@ -161,9 +164,6 @@ Other Changes
* SOLR-4384: Make post.jar report timing information (Upayavira via janhoy) * SOLR-4384: Make post.jar report timing information (Upayavira via janhoy)
* SOLR-4421: On CoreContainer shutdown, all SolrCores should publish their
state as DOWN. (Mark Miller)
================== 4.1.0 ================== ================== 4.1.0 ==================
Versions of Major Components Versions of Major Components

View File

@ -480,9 +480,14 @@ public final class ZkController {
} }
private void init(CurrentCoreDescriptorProvider registerOnReconnect) { private void init(CurrentCoreDescriptorProvider registerOnReconnect) {
boolean alreadyCreatedZkReader = false;
try { try {
alreadyCreatedZkReader = publishAndWaitForDownStates(alreadyCreatedZkReader); boolean createdWatchesAndUpdated = false;
if (zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, true)) {
zkStateReader.createClusterStateWatchersAndUpdate();
createdWatchesAndUpdated = true;
publishAndWaitForDownStates();
}
// makes nodes zkNode // makes nodes zkNode
cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient); cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient);
@ -501,7 +506,7 @@ public final class ZkController {
overseerElector.setup(context); overseerElector.setup(context);
overseerElector.joinElection(context, false); overseerElector.joinElection(context, false);
if (!alreadyCreatedZkReader) { if (!createdWatchesAndUpdated) {
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
} }
@ -523,93 +528,92 @@ public final class ZkController {
} }
private boolean publishAndWaitForDownStates(boolean alreadyCreatedZkReader) public void publishAndWaitForDownStates() throws KeeperException,
throws KeeperException, InterruptedException { InterruptedException {
if (zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, true)) {
alreadyCreatedZkReader = true; ClusterState clusterState = zkStateReader.getClusterState();
// try and publish anyone from our node as down Set<String> collections = clusterState.getCollections();
zkStateReader.createClusterStateWatchersAndUpdate(); List<String> updatedNodes = new ArrayList<String>();
ClusterState clusterState = zkStateReader.getClusterState(); for (String collectionName : collections) {
Set<String> collections = clusterState.getCollections(); DocCollection collection = clusterState.getCollection(collectionName);
List<String> updatedNodes = new ArrayList<String>(); Collection<Slice> slices = collection.getSlices();
for (Slice slice : slices) {
Collection<Replica> replicas = slice.getReplicas();
for (Replica replica : replicas) {
if (replica.getNodeName().equals(getNodeName())
&& !(replica.getStr(ZkStateReader.STATE_PROP)
.equals(ZkStateReader.DOWN))) {
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state",
ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
ZkStateReader.BASE_URL_PROP, getBaseUrl(),
ZkStateReader.CORE_NAME_PROP,
replica.getStr(ZkStateReader.CORE_NAME_PROP),
ZkStateReader.ROLES_PROP,
replica.getStr(ZkStateReader.ROLES_PROP),
ZkStateReader.NODE_NAME_PROP, getNodeName(),
ZkStateReader.SHARD_ID_PROP,
replica.getStr(ZkStateReader.SHARD_ID_PROP),
ZkStateReader.COLLECTION_PROP,
replica.getStr(ZkStateReader.COLLECTION_PROP));
updatedNodes.add(replica.getStr(ZkStateReader.CORE_NAME_PROP));
overseerJobQueue.offer(ZkStateReader.toJSON(m));
}
}
}
}
// now wait till the updates are in our state
long now = System.currentTimeMillis();
long timeout = now + 1000 * 300;
boolean foundStates = false;
while (System.currentTimeMillis() < timeout) {
clusterState = zkStateReader.getClusterState();
collections = clusterState.getCollections();
for (String collectionName : collections) { for (String collectionName : collections) {
DocCollection collection = clusterState.getCollection(collectionName); DocCollection collection = clusterState.getCollection(collectionName);
Collection<Slice> slices = collection.getSlices(); Collection<Slice> slices = collection.getSlices();
for (Slice slice : slices) { for (Slice slice : slices) {
Collection<Replica> replicas = slice.getReplicas(); Collection<Replica> replicas = slice.getReplicas();
for (Replica replica : replicas) { for (Replica replica : replicas) {
if (replica.getNodeName().equals(getNodeName()) if (replica.getStr(ZkStateReader.STATE_PROP).equals(
&& !(replica.getStr(ZkStateReader.STATE_PROP) ZkStateReader.DOWN)) {
.equals(ZkStateReader.DOWN))) { updatedNodes.remove(replica.getStr(ZkStateReader.CORE_NAME_PROP));
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
"state", ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
ZkStateReader.BASE_URL_PROP, getBaseUrl(),
ZkStateReader.CORE_NAME_PROP, replica.getStr(ZkStateReader.CORE_NAME_PROP),
ZkStateReader.ROLES_PROP,
replica.getStr(ZkStateReader.ROLES_PROP),
ZkStateReader.NODE_NAME_PROP, getNodeName(),
ZkStateReader.SHARD_ID_PROP,
replica.getStr(ZkStateReader.SHARD_ID_PROP),
ZkStateReader.COLLECTION_PROP,
replica.getStr(ZkStateReader.COLLECTION_PROP));
updatedNodes.add(replica.getStr(ZkStateReader.CORE_NAME_PROP));
overseerJobQueue.offer(ZkStateReader.toJSON(m));
} }
} }
} }
} }
// now wait till the updates are in our state if (updatedNodes.size() == 0) {
long now = System.currentTimeMillis(); foundStates = true;
long timeout = now + 1000 * 300; break;
boolean foundStates = false;
while (System.currentTimeMillis() < timeout) {
clusterState = zkStateReader.getClusterState();
collections = clusterState.getCollections();
for (String collectionName : collections) {
DocCollection collection = clusterState
.getCollection(collectionName);
Collection<Slice> slices = collection.getSlices();
for (Slice slice : slices) {
Collection<Replica> replicas = slice.getReplicas();
for (Replica replica : replicas) {
if (replica.getStr(ZkStateReader.STATE_PROP).equals(
ZkStateReader.DOWN)) {
updatedNodes.remove(replica
.getStr(ZkStateReader.CORE_NAME_PROP));
}
}
}
}
if (updatedNodes.size() == 0) {
foundStates = true;
break;
}
}
if (!foundStates) {
log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state.");
} }
} }
return alreadyCreatedZkReader; if (!foundStates) {
log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state.");
}
} }
/** /**
* Validates if the chroot exists in zk (or if it is successfully created). Optionally, if create is set to true this method will create the path * Validates if the chroot exists in zk (or if it is successfully created).
* in case it doesn't exist * Optionally, if create is set to true this method will create the path in
* @return true if the path exists or is created * case it doesn't exist
* false if the path doesn't exist and 'create' = false *
* @return true if the path exists or is created false if the path doesn't
* exist and 'create' = false
*/ */
public static boolean checkChrootPath(String zkHost, boolean create) throws KeeperException, InterruptedException { public static boolean checkChrootPath(String zkHost, boolean create)
if(!containsChroot(zkHost)) { throws KeeperException, InterruptedException {
if (!containsChroot(zkHost)) {
return true; return true;
} }
log.info("zkHost includes chroot"); log.info("zkHost includes chroot");
String chrootPath = zkHost.substring(zkHost.indexOf("/"), zkHost.length()); String chrootPath = zkHost.substring(zkHost.indexOf("/"), zkHost.length());
SolrZkClient tmpClient = new SolrZkClient(zkHost.substring(0, zkHost.indexOf("/")), 60*1000); SolrZkClient tmpClient = new SolrZkClient(zkHost.substring(0,
zkHost.indexOf("/")), 60 * 1000);
boolean exists = tmpClient.exists(chrootPath, true); boolean exists = tmpClient.exists(chrootPath, true);
if(!exists && create) { if (!exists && create) {
tmpClient.makePath(chrootPath, false, true); tmpClient.makePath(chrootPath, false, true);
exists = true; exists = true;
} }
@ -617,7 +621,6 @@ public final class ZkController {
return exists; return exists;
} }
/** /**
* Validates if zkHost contains a chroot. See http://zookeeper.apache.org/doc/r3.2.2/zookeeperProgrammers.html#ch_zkSessions * Validates if zkHost contains a chroot. See http://zookeeper.apache.org/doc/r3.2.2/zookeeperProgrammers.html#ch_zkSessions
*/ */

View File

@ -740,12 +740,24 @@ public class CoreContainer
public void shutdown() { public void shutdown() {
log.info("Shutting down CoreContainer instance=" log.info("Shutting down CoreContainer instance="
+ System.identityHashCode(this)); + System.identityHashCode(this));
if (isZooKeeperAware()) {
try {
zkController.publishAndWaitForDownStates();
} catch (KeeperException e) {
log.error("", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.warn("", e);
}
}
isShutDown = true; isShutDown = true;
if (isZooKeeperAware()) { if (isZooKeeperAware()) {
publishCoresAsDown();
cancelCoreRecoveries(); cancelCoreRecoveries();
} }
try { try {
synchronized (cores) { synchronized (cores) {
@ -784,20 +796,6 @@ public class CoreContainer
} }
} }
private void publishCoresAsDown() {
synchronized (cores) {
for (SolrCore core : cores.values()) {
try {
zkController.publish(core.getCoreDescriptor(), ZkStateReader.DOWN);
} catch (KeeperException e) {
log.error("", e);
} catch (InterruptedException e) {
log.error("", e);
}
}
}
}
public void cancelCoreRecoveries() { public void cancelCoreRecoveries() {
ArrayList<SolrCoreState> coreStates = new ArrayList<SolrCoreState>(); ArrayList<SolrCoreState> coreStates = new ArrayList<SolrCoreState>();
synchronized (cores) { synchronized (cores) {

View File

@ -207,7 +207,6 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes
if (DEBUG) { if (DEBUG) {
printLayout(); printLayout();
} }
zkServer.shutdown();
System.clearProperty("zkHost"); System.clearProperty("zkHost");
System.clearProperty("collection"); System.clearProperty("collection");
System.clearProperty("enable.update.log"); System.clearProperty("enable.update.log");
@ -217,6 +216,7 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes
System.clearProperty("solr.test.sys.prop2"); System.clearProperty("solr.test.sys.prop2");
resetExceptionIgnores(); resetExceptionIgnores();
super.tearDown(); super.tearDown();
zkServer.shutdown();
} }
protected void printLayout() throws Exception { protected void printLayout() throws Exception {