SOLR-8973: Zookeeper frenzy when a core is first created.

This commit is contained in:
Scott Blum 2016-04-19 19:39:55 -04:00
parent 99ba1a8f99
commit 71a5870ae7
4 changed files with 89 additions and 21 deletions

View File

@ -190,6 +190,8 @@ Other Changes
* SOLR-9015: Adds SelectStream as a default function in the StreamHandler (Dennis Gove)
* SOLR-8973: Zookeeper frenzy when a core is first created. (Janmejay Singh, Scott Blum, shalin)
================== 6.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -1478,11 +1478,13 @@ public final class ZkController {
}
publish(cd, Replica.State.DOWN, false, true);
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(cd.getCloudDescriptor().getCollectionName());
if (collection != null) {
log.info("Registering watch for collection {}", cd.getCloudDescriptor().getCollectionName());
zkStateReader.addCollectionWatch(cd.getCloudDescriptor().getCollectionName());
}
String collectionName = cd.getCloudDescriptor().getCollectionName();
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
log.info(collection == null ?
"Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" :
"Registering watch for collection {}",
collectionName);
zkStateReader.addCollectionWatch(collectionName);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);

View File

@ -26,6 +26,7 @@ import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.OverseerTest;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.ZkTestServer;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.Slice;
@ -179,4 +180,63 @@ public class ZkStateReaderTest extends SolrTestCaseJ4 {
server.shutdown();
}
}
public void testWatchedCollectionCreation() throws Exception {
String zkDir = createTempDir("testWatchedCollectionCreation").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
ZkStateReader reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
reader.addCollectionWatch("c1");
// Initially there should be no c1 collection.
assertNull(reader.getClusterState().getCollectionRef("c1"));
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
reader.forceUpdateCollection("c1");
// Still no c1 collection, despite a collection path.
assertNull(reader.getClusterState().getCollectionRef("c1"));
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
// create new collection with stateFormat = 2
DocCollection state = new DocCollection("c1", new HashMap<String, Slice>(), new HashMap<String, Object>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE + "/c1/state.json");
ZkWriteCommand wc = new ZkWriteCommand("c1", state);
writer.enqueueUpdate(reader.getClusterState(), wc, null);
writer.writePendingUpdates();
assertTrue(zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true));
//reader.forceUpdateCollection("c1");
for (int i = 0; i < 100; ++i) {
Thread.sleep(50);
ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
if (ref != null) {
break;
}
}
ClusterState.CollectionRef ref = reader.getClusterState().getCollectionRef("c1");
assertNotNull(ref);
assertFalse(ref.isLazilyLoaded());
assertEquals(2, ref.get().getStateFormat());
} finally {
IOUtils.close(zkClient);
server.shutdown();
}
}
}

View File

@ -443,13 +443,6 @@ public class ZkStateReader implements Closeable {
// To move a collection's state to format2, first create the new state2 format node, then remove legacy entry.
Map<String, ClusterState.CollectionRef> result = new LinkedHashMap<>(legacyCollectionStates);
// Are there any interesting collections that disappeared from the legacy cluster state?
for (String coll : interestingCollections) {
if (!result.containsKey(coll) && !watchedCollectionStates.containsKey(coll)) {
new StateWatcher(coll).refreshAndWatch(true);
}
}
// Add state format2 collections, but don't override legacy collection states.
for (Map.Entry<String, DocCollection> entry : watchedCollectionStates.entrySet()) {
result.putIfAbsent(entry.getKey(), new ClusterState.CollectionRef(entry.getValue()));
@ -1048,6 +1041,7 @@ public class ZkStateReader implements Closeable {
private DocCollection fetchCollectionState(String coll, Watcher watcher) throws KeeperException, InterruptedException {
String collectionPath = getCollectionPath(coll);
while (true) {
try {
Stat stat = new Stat();
byte[] data = zkClient.getData(collectionPath, watcher, stat, true);
@ -1056,9 +1050,19 @@ public class ZkStateReader implements Closeable {
ClusterState.CollectionRef collectionRef = state.getCollectionStates().get(coll);
return collectionRef == null ? null : collectionRef.get();
} catch (KeeperException.NoNodeException e) {
if (watcher != null) {
// Leave an exists watch in place in case a state.json is created later.
Stat exists = zkClient.exists(collectionPath, watcher, true);
if (exists != null) {
// Rare race condition, we tried to fetch the data and couldn't find it, then we found it exists.
// Loop and try again.
continue;
}
}
return null;
}
}
}
public static String getCollectionPath(String coll) {
return COLLECTIONS_ZKNODE+"/"+coll + "/state.json";