SOLR-8973: Zookeeper frenzy when a core is first created.

This commit is contained in:
Scott Blum 2016-04-19 19:39:55 -04:00
parent 99ba1a8f99
commit 71a5870ae7
4 changed files with 89 additions and 21 deletions

View File

@ -190,6 +190,8 @@ Other Changes
* SOLR-9015: Adds SelectStream as a default function in the StreamHandler (Dennis Gove) * SOLR-9015: Adds SelectStream as a default function in the StreamHandler (Dennis Gove)
* SOLR-8973: Zookeeper frenzy when a core is first created. (Janmejay Singh, Scott Blum, shalin)
================== 6.0.0 ================== ================== 6.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -1478,11 +1478,13 @@ public final class ZkController {
} }
publish(cd, Replica.State.DOWN, false, true); publish(cd, Replica.State.DOWN, false, true);
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(cd.getCloudDescriptor().getCollectionName()); String collectionName = cd.getCloudDescriptor().getCollectionName();
if (collection != null) { DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
log.info("Registering watch for collection {}", cd.getCloudDescriptor().getCollectionName()); log.info(collection == null ?
zkStateReader.addCollectionWatch(cd.getCloudDescriptor().getCollectionName()); "Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
} "Registering watch for collection {}",
collectionName);
zkStateReader.addCollectionWatch(collectionName);
} catch (KeeperException e) { } catch (KeeperException e) {
log.error("", e); log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);

View File

@ -26,6 +26,7 @@ import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.OverseerTest; import org.apache.solr.cloud.OverseerTest;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.ZkTestServer; import org.apache.solr.cloud.ZkTestServer;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter; import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
@ -179,4 +180,63 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
server.shutdown(); server.shutdown();
} }
} }
public void testWatchedCollectionCreation() throws Exception {
String zkDir = createTempDir("testWatchedCollectionCreation").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
reader.addCollectionWatch("c1");
// Initially there should be no c1 collection.
assertNull(reader.getClusterState().getCollectionRef("c1"));
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
reader.forceUpdateCollection("c1");
// Still no c1 collection, despite a collection path.
assertNull(reader.getClusterState().getCollectionRef("c1"));
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
// create new collection with stateFormat = 2
DocCollection state = new DocCollection("c1", new HashMap<String, Slice>(), new HashMap<String, Object>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE + "/c1/state.json");
ZkWriteCommand wc = new ZkWriteCommand("c1", state);
writer.enqueueUpdate(reader.getClusterState(), wc, null);
writer.writePendingUpdates();
assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true));
//reader.forceUpdateCollection("c1");
for (int i = 0; i < 100; ++i) {
Thread.sleep(50);
ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
if (ref != null) {
break;
}
}
ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
assertNotNull(ref);
assertFalse(ref.isLazilyLoaded());
assertEquals(2, ref.get().getStateFormat());
} finally {
IOUtils.close(zkClient);
server.shutdown();
}
}
} }

View File

@ -443,13 +443,6 @@ public class ZkStateReader implements Closeable {
// To move a collection's state to format2, first create the new state2 format node, then remove legacy entry. // To move a collection's state to format2, first create the new state2 format node, then remove legacy entry.
Map<String, ClusterState.CollectionRef> result = new LinkedHashMap<>(legacyCollectionStates); Map<String, ClusterState.CollectionRef> result = new LinkedHashMap<>(legacyCollectionStates);
// Are there any interesting collections that disappeared from the legacy cluster state?
for (String coll : interestingCollections) {
if (!result.containsKey(coll) && !watchedCollectionStates.containsKey(coll)) {
new StateWatcher(coll).refreshAndWatch(true);
}
}
// Add state format2 collections, but don't override legacy collection states. // Add state format2 collections, but don't override legacy collection states.
for (Map.Entry<String, DocCollection> entry : watchedCollectionStates.entrySet()) { for (Map.Entry<String, DocCollection> entry : watchedCollectionStates.entrySet()) {
result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue())); result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue()));
@ -1048,15 +1041,26 @@ public class ZkStateReader implements Closeable {
private DocCollection fetchCollectionState(String coll, Watcher watcher) throws KeeperException, InterruptedException { private DocCollection fetchCollectionState(String coll, Watcher watcher) throws KeeperException, InterruptedException {
String collectionPath = getCollectionPath(coll); String collectionPath = getCollectionPath(coll);
try { while (true) {
Stat stat = new Stat(); try {
byte[] data = zkClient.getData(collectionPath, watcher, stat, true); Stat stat = new Stat();
ClusterState state = ClusterState.load(stat.getVersion(), data, byte[] data = zkClient.getData(collectionPath, watcher, stat, true);
Collections.<String>emptySet(), collectionPath); ClusterState state = ClusterState.load(stat.getVersion(), data,
ClusterState.CollectionRef collectionRef = state.getCollectionStates().get(coll); Collections.<String>emptySet(), collectionPath);
return collectionRef == null ? null : collectionRef.get(); ClusterState.CollectionRef collectionRef = state.getCollectionStates().get(coll);
} catch (KeeperException.NoNodeException e) { return collectionRef == null ? null : collectionRef.get();
return null; } catch (KeeperException.NoNodeException e) {
if (watcher != null) {
// Leave an exists watch in place in case a state.json is created later.
Stat exists = zkClient.exists(collectionPath, watcher, true);
if (exists != null) {
// Rare race condition, we tried to fetch the data and couldn't find it, then we found it exists.
// Loop and try again.
continue;
}
}
return null;
}
} }
} }