diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 3f013a5af1b..0cc5e8eed00 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -186,6 +186,9 @@ Bug Fixes * SOLR-6507: Fixed several bugs involving stats.field used with local params (hossman) +* SOLR-6481: CLUSTERSTATUS should check if the node hosting a replica is live when + reporting replica status (Timothy Potter) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java index 9adc75e555f..e166366b190 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java @@ -40,6 +40,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; @@ -795,6 +796,10 @@ public class OverseerCollectionProcessor implements Runnable, Closeable { } } + List liveNodes = zkStateReader.getZkClient().getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true); + + // now we need to walk the collectionProps tree to cross-check replica state with live nodes + crossCheckReplicaStateWithLiveNodes(liveNodes, collectionProps); NamedList clusterStatus = new SimpleOrderedMap<>(); clusterStatus.add("collections", collectionProps); @@ -816,12 +821,42 @@ public class OverseerCollectionProcessor implements Runnable, Closeable { } // add live_nodes - List liveNodes = zkStateReader.getZkClient().getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true); clusterStatus.add("live_nodes", liveNodes); results.add("cluster", clusterStatus); } + /** + * Walks the tree of collection status to verify that any replicas not reporting a "down" status is + * on a live node, if any replicas reporting their status as "active" but the node is not live is + * marked as "down"; used by CLUSTERSTATUS. + * @param liveNodes List of currently live node names. + * @param collectionProps Map of collection status information pulled directly from ZooKeeper. + */ + protected void crossCheckReplicaStateWithLiveNodes(List liveNodes, NamedList collectionProps) { + Iterator> colls = collectionProps.iterator(); + while (colls.hasNext()) { + Map.Entry next = colls.next(); + Map collMap = (Map)next.getValue(); + Map shards = (Map)collMap.get("shards"); + for (Object nextShard : shards.values()) { + Map shardMap = (Map)nextShard; + Map replicas = (Map)shardMap.get("replicas"); + for (Object nextReplica : replicas.values()) { + Map replicaMap = (Map)nextReplica; + if (!ZkStateReader.DOWN.equals(replicaMap.get(ZkStateReader.STATE_PROP))) { + // not down, so verify the node is live + String node_name = (String)replicaMap.get(ZkStateReader.NODE_NAME_PROP); + if (!liveNodes.contains(node_name)) { + // node is not live, so this replica is actually down + replicaMap.put(ZkStateReader.STATE_PROP, ZkStateReader.DOWN); + } + } + } + } + } + } + /** * Get collection status from cluster state. * Can return collection status by given shard name.