diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 9df6a2d7e83..dd01368b863 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.ClusterStateProvider; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.cloud.api.collections.CreateCollectionCmd; import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.autoscaling.OverseerTriggerThread; import org.apache.solr.cloud.overseer.ClusterStateMutator; @@ -79,8 +80,61 @@ import org.slf4j.LoggerFactory; import com.codahale.metrics.Timer; /** - * Cluster leader. Responsible for processing state updates, node assignments, creating/deleting - * collections, shards, replicas and setting various properties. + *

Cluster leader. Responsible for processing state updates, node assignments, creating/deleting + * collections, shards, replicas and setting various properties.

+ * + *

The Overseer is a single elected node in the SolrCloud cluster that is in charge of interactions with + * ZooKeeper that require global synchronization. It also hosts the Collection API implementation and the + * Autoscaling framework.

+ * + *

The Overseer deals with:

+ * + * + *

The nodes in the cluster communicate with the Overseer over queues implemented in ZooKeeper. There are essentially + * two queues:

+ *
    + *
  1. The state update queue, through which nodes request the Overseer to update the state.json file of a + * Collection in ZooKeeper. This queue is in Zookeeper at /overseer/queue,
  2. + *
  3. A queue shared between Collection API and Config Set API requests. This queue is in Zookeeper at + * /overseer/collection-queue-work.
  4. + *
+ * + *

An example of the steps involved in the Overseer processing a Collection creation API call:

+ *
    + *
  1. Client uses the Collection API with CREATE action and reaches a node of the cluster,
  2. + *
  3. The node (via {@link CollectionsHandler}) enqueues the request into the /overseer/collection-queue-work + * queue in ZooKeepeer,
  4. + *
  5. The {@link OverseerCollectionConfigSetProcessor} running on the Overseer node dequeues the message and using an + * executor service with a maximum pool size of {@link OverseerTaskProcessor#MAX_PARALLEL_TASKS} hands it for processing + * to {@link OverseerCollectionMessageHandler},
  6. + *
  7. Command {@link CreateCollectionCmd} then executes and does: + *
      + *
    1. Update some state directly in ZooKeeper (creating collection znode),
    2. + *
    3. Compute replica placement on available nodes in the cluster,
    4. + *
    5. Enqueue a state change request for creating the state.json file for the collection in ZooKeeper. + * This is done by enqueuing a message in /overseer/queue,
    6. + *
    7. The command then waits for the update to be seen in ZooKeeper...
    8. + *
  8. + *
  9. The {@link ClusterStateUpdater} (also running on the Overseer node) dequeues the state change message and creates the + * state.json file in ZooKeeper for the Collection. All the work of the cluster state updater + * (creations, updates, deletes) is done sequentially for the whole cluster by a single thread.
  10. + *
  11. The {@link CreateCollectionCmd} sees the state change in + * ZooKeeper and: + *
      + *
    1. Builds and sends requests to each node to create the appropriate cores for all the replicas of all shards + * of the collection. Nodes create the replicas and set them to {@link org.apache.solr.common.cloud.Replica.State#ACTIVE}.
    2. + *
  12. + *
  13. The collection creation command has succeeded from the Overseer perspective,
  14. + *
  15. {@link CollectionsHandler} checks the replicas in Zookeeper and verifies they are all + * {@link org.apache.solr.common.cloud.Replica.State#ACTIVE},
  16. + *
  17. The client receives a success return.
  18. + *
*/ public class Overseer implements SolrCloseable { public static final String QUEUE_OPERATION = "operation"; @@ -97,6 +151,12 @@ public class Overseer implements SolrCloseable { enum LeaderStatus {DONT_KNOW, NO, YES} + /** + *

This class is responsible for dequeueing state change requests from the ZooKeeper queue at /overseer/queue + * and executing the requested cluster change (essentially writing or updating state.json for a collection).

+ * + *

The cluster state updater is a single thread dequeueing and executing requests.

+ */ private class ClusterStateUpdater implements Runnable, Closeable { private final ZkStateReader reader; diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java index 465888ff765..53d799b9f57 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java @@ -51,9 +51,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * A ZK-based distributed queue. Optimized for single-consumer, + *

A ZK-based distributed queue. Optimized for single-consumer, * multiple-producer: if there are multiple consumers on the same ZK queue, - * the results should be correct but inefficient + * the results should be correct but inefficient.

+ * + *

This implementation (with help from subclass {@link OverseerTaskQueue}) is used for the + * /overseer/collection-queue-work queue used for Collection and Config Set API calls to the Overseer.

+ * + *

Implementation note: In order to enqueue a message into this queue, a {@link CreateMode#EPHEMERAL_SEQUENTIAL} response node is created + * and watched at /overseer/collection-queue-work/qnr-monotonically_increasng_id, then a corresponding + * {@link CreateMode#PERSISTENT} request node reusing the same id is created at /overseer/collection-queue-work/qn-response_id.

*/ public class ZkDistributedQueue implements DistributedQueue { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java index 89f1600722b..3e8ee37ab4f 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java @@ -70,6 +70,15 @@ public interface CollectionParams { } } + /** + *

(Mostly) Collection API actions that can be sent by nodes to the Overseer over the /overseer/collection-queue-work + * ZooKeeper queue.

+ * + *

Some of these actions are also used over the cluster state update queue at /overseer/queue and have a + * different (though related) meaning there. These actions are: + * {@link #CREATE}, {@link #DELETE}, {@link #CREATESHARD}, {@link #DELETESHARD}, {@link #ADDREPLICA}, {@link #ADDREPLICAPROP}, + * {@link #DELETEREPLICAPROP}, {@link #BALANCESHARDUNIQUE}, {@link #MODIFYCOLLECTION} and {@link #MIGRATESTATEFORMAT}.

+ */ enum CollectionAction { CREATE(true, LockLevel.COLLECTION), DELETE(true, LockLevel.COLLECTION),