From 6f2d99e574888227a29ea173e52f6ff6a19e23db Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Mon, 26 Feb 2018 11:22:38 +0530 Subject: [PATCH] SOLR-10720: Aggressive removal of a collection breaks cluster status API --- solr/CHANGES.txt | 2 ++ .../OverseerCollectionMessageHandler.java | 34 ------------------- .../solr/handler/admin/ClusterStatus.java | 20 +++++++---- 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c6a2dd00285..c915aa48e36 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -223,6 +223,8 @@ Bug Fixes * SOLR-12021: Fixed a bug in ApiSpec and other JSON resource loading that was causing unclosed file handles (hossman) +* SOLR-10720: Aggressive removal of a collection breaks cluster status API. (Alexey Serba, shalin) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java index 2143d1e7155..b7cf013e603 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/OverseerCollectionMessageHandler.java @@ -23,7 +23,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; @@ -372,39 +371,6 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler, inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap))); } - /** - * Walks the tree of collection status to verify that any replicas not reporting a "down" status is - * on a live node, if any replicas reporting their status as "active" but the node is not live is - * marked as "down"; used by CLUSTERSTATUS. - * @param liveNodes List of currently live node names. - * @param collectionProps Map of collection status information pulled directly from ZooKeeper. - */ - - @SuppressWarnings("unchecked") - protected void crossCheckReplicaStateWithLiveNodes(List liveNodes, NamedList collectionProps) { - Iterator> colls = collectionProps.iterator(); - while (colls.hasNext()) { - Map.Entry next = colls.next(); - Map collMap = (Map)next.getValue(); - Map shards = (Map)collMap.get("shards"); - for (Object nextShard : shards.values()) { - Map shardMap = (Map)nextShard; - Map replicas = (Map)shardMap.get("replicas"); - for (Object nextReplica : replicas.values()) { - Map replicaMap = (Map)nextReplica; - if (Replica.State.getState((String) replicaMap.get(ZkStateReader.STATE_PROP)) != Replica.State.DOWN) { - // not down, so verify the node is live - String node_name = (String)replicaMap.get(ZkStateReader.NODE_NAME_PROP); - if (!liveNodes.contains(node_name)) { - // node is not live, so this replica is actually down - replicaMap.put(ZkStateReader.STATE_PROP, Replica.State.DOWN.toString()); - } - } - } - } - } - } - /** * Get collection status from cluster state. * Can return collection status by given shard name. diff --git a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java index 0f159544f0f..9ebac776cdc 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/ClusterStatus.java @@ -17,6 +17,7 @@ package org.apache.solr.handler.admin; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -53,7 +54,7 @@ public class ClusterStatus { } @SuppressWarnings("unchecked") - public void getClusterStatus(NamedList results) + public void getClusterStatus(NamedList results) throws KeeperException, InterruptedException { // read aliases Aliases aliases = zkStateReader.getAliases(); @@ -116,9 +117,7 @@ public class ClusterStatus { } if (shard != null) { String[] paramShards = shard.split(","); - for(String paramShard : paramShards){ - requestedShards.add(paramShard); - } + requestedShards.addAll(Arrays.asList(paramShards)); } if (clusterStateCollection.getStateFormat() > 1) { @@ -133,9 +132,16 @@ public class ClusterStatus { if (collectionVsAliases.containsKey(name) && !collectionVsAliases.get(name).isEmpty()) { collectionStatus.put("aliases", collectionVsAliases.get(name)); } - String configName = zkStateReader.readConfigName(name); - collectionStatus.put("configName", configName); - collectionProps.add(name, collectionStatus); + try { + String configName = zkStateReader.readConfigName(name); + collectionStatus.put("configName", configName); + collectionProps.add(name, collectionStatus); + } catch (SolrException e) { + if (e.getCause() instanceof KeeperException.NoNodeException) { + // skip this collection because the collection's znode has been deleted + // which can happen during aggressive collection removal, see SOLR-10720 + } else throw e; + } } List liveNodes = zkStateReader.getZkClient().getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true);