diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 03f2f390c01..ecfeedc3495 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -91,6 +91,9 @@ New Features * SOLR-10373: Implement read API for autoscaling configuration at /admin/autoscaling or /cluster/autoscaling paths. (shalin) +* SOLR-10677: Expose a diagnostics API to return nodes sorted by load in descending order and + any policy violations. (shalin) + Bug Fixes ---------------------- * SOLR-9262: Connection and read timeouts are being ignored by UpdateShardHandler after SOLR-4509. diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScalingHandler.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScalingHandler.java index 701d499060b..7617f1334b3 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScalingHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScalingHandler.java @@ -17,6 +17,7 @@ package org.apache.solr.cloud.autoscaling; +import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; @@ -33,6 +34,8 @@ import java.util.concurrent.TimeUnit; import com.google.common.collect.ImmutableSet; import org.apache.solr.api.Api; import org.apache.solr.api.ApiBag; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.SolrClientDataProvider; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; @@ -83,58 +86,98 @@ public class AutoScalingHandler extends RequestHandlerBase implements Permission @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { - String httpMethod = (String) req.getContext().get("httpMethod"); - RequestHandlerUtils.setWt(req, JSON); + try { + String httpMethod = (String) req.getContext().get("httpMethod"); + RequestHandlerUtils.setWt(req, JSON); - if ("GET".equals(httpMethod)) { - Map map = zkReadAutoScalingConf(container.getZkController().getZkStateReader()); - rsp.getValues().addAll(map); - RequestHandlerUtils.addExperimentalFormatWarning(rsp); - } else { - if (req.getContentStreams() == null) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No commands specified for autoscaling"); - } - List ops = CommandOperation.readCommands(req.getContentStreams(), rsp, singletonCommands); - if (ops == null) { - // errors have already been added to the response so there's nothing left to do - return; - } - for (CommandOperation op : ops) { - switch (op.name) { - case "set-trigger": - handleSetTrigger(req, rsp, op); - break; - case "remove-trigger": - handleRemoveTrigger(req, rsp, op); - break; - case "set-listener": - handleSetListener(req, rsp, op); - break; - case "remove-listener": - handleRemoveListener(req, rsp, op); - break; - case "suspend-trigger": - handleSuspendTrigger(req, rsp, op); - break; - case "resume-trigger": - handleResumeTrigger(req, rsp, op); - break; - case "set-policy": - handleSetPolicies(req, rsp, op); - break; - case "remove-policy": - handleRemovePolicy(req, rsp, op); - break; - case "set-cluster-preferences": - handleSetClusterPreferences(req, rsp, op); - break; - case "set-cluster-policy": - handleSetClusterPolicy(req, rsp, op); - break; - default: - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown command: " + op.name); + if ("GET".equals(httpMethod)) { + Map map = zkReadAutoScalingConf(container.getZkController().getZkStateReader()); + rsp.getValues().addAll(map); + if (req.getParams().getBool("diagnostics", false)) { + handleDiagnostics(rsp, map); + } + } else { + if (req.getContentStreams() == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No commands specified for autoscaling"); + } + List ops = CommandOperation.readCommands(req.getContentStreams(), rsp, singletonCommands); + if (ops == null) { + // errors have already been added to the response so there's nothing left to do + return; + } + for (CommandOperation op : ops) { + switch (op.name) { + case "set-trigger": + handleSetTrigger(req, rsp, op); + break; + case "remove-trigger": + handleRemoveTrigger(req, rsp, op); + break; + case "set-listener": + handleSetListener(req, rsp, op); + break; + case "remove-listener": + handleRemoveListener(req, rsp, op); + break; + case "suspend-trigger": + handleSuspendTrigger(req, rsp, op); + break; + case "resume-trigger": + handleResumeTrigger(req, rsp, op); + break; + case "set-policy": + handleSetPolicies(req, rsp, op); + break; + case "remove-policy": + handleRemovePolicy(req, rsp, op); + break; + case "set-cluster-preferences": + handleSetClusterPreferences(req, rsp, op); + break; + case "set-cluster-policy": + handleSetClusterPolicy(req, rsp, op); + break; + default: + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown command: " + op.name); + } } } + } finally { + RequestHandlerUtils.addExperimentalFormatWarning(rsp); + } + } + + private void handleDiagnostics(SolrQueryResponse rsp, Map autoScalingConf) throws IOException { + Policy policy = new Policy(autoScalingConf); + try (CloudSolrClient build = new CloudSolrClient.Builder() + .withHttpClient(container.getUpdateShardHandler().getHttpClient()) + .withZkHost(container.getZkController().getZkServerAddress()).build()) { + Policy.Session session = policy.createSession(new SolrClientDataProvider(build)); + List sorted = session.getSorted(); + List violations = session.getViolations(); + + List clusterPreferences = policy.getClusterPreferences(); + + List> sortedNodes = new ArrayList<>(sorted.size()); + for (Row row : sorted) { + Map map = Utils.makeMap("node", row.node); + for (Cell cell : row.cells) { + for (Preference clusterPreference : clusterPreferences) { + Policy.SortParam name = clusterPreference.name; + if (cell.name.equalsIgnoreCase(name.name())) { + map.put(name.name(), cell.val); + break; + } + } + } + sortedNodes.add(map); + } + + Map map = new HashMap<>(2); + map.put("sortedNodes", sortedNodes); + + map.put("violations", violations); + rsp.getValues().add("diagnostics", map); } } diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java index 81ef5ff414d..460fe643420 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java @@ -29,6 +29,8 @@ import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.cloud.ZkNodeProps; @@ -396,7 +398,7 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { " 'set-cluster-preferences': [" + " {'minimize': 'cores', 'precision': 3}," + " {'maximize': 'freedisk','precision': 100}," + - " {'minimize': 'cpu','precision': 10}]" + + " {'minimize': 'sysLoadAvg','precision': 10}]" + "}"; req = new AutoScalingRequest(SolrRequest.METHOD.POST, path, setPreferencesCommand); response = solrClient.request(req); @@ -409,7 +411,7 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { // set preferences setPreferencesCommand = "{" + " 'set-cluster-preferences': [" + - " {'minimize': 'cpu','precision': 10}]" + + " {'minimize': 'sysLoadAvg','precision': 10}]" + "}"; req = new AutoScalingRequest(SolrRequest.METHOD.POST, path, setPreferencesCommand); response = solrClient.request(req); @@ -466,7 +468,8 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { " 'set-cluster-preferences': [" + " {'minimize': 'cores', 'precision': 3}," + " {'maximize': 'freedisk','precision': 100}," + - " {'minimize': 'cpu','precision': 10}]" + + " {'minimize': 'sysLoadAvg','precision': 10}," + + " {'minimize': 'heapUsage','precision': 10}]" + "}"; req = new AutoScalingRequest(SolrRequest.METHOD.POST, path, setPreferencesCommand); response = solrClient.request(req); @@ -478,7 +481,6 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { " {'nodeRole':'!overseer', 'replica':'#ANY'}" + " ]," + " 'policy1':[" + - " {'cores':'<2', 'node':'#ANY'}," + " {'replica':'<2', 'shard': '#EACH', 'node': '#ANY'}" + " ]" + "}}"; @@ -486,7 +488,7 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { response = solrClient.request(req); assertEquals(response.get("result").toString(), "success"); - SolrQuery query = new SolrQuery().setParam(CommonParams.QT, path); + SolrQuery query = new SolrQuery().setParam(CommonParams.QT, path).setParam("diagnostics", true); QueryResponse queryResponse = solrClient.query(query); response = queryResponse.getResponse(); @@ -502,7 +504,7 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { List clusterPrefs = (List) response.get("cluster-preferences"); assertNotNull(clusterPrefs); - assertEquals(3, clusterPrefs.size()); + assertEquals(4, clusterPrefs.size()); List clusterPolicy = (List) response.get("cluster-policy"); assertNotNull(clusterPolicy); @@ -513,6 +515,81 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { assertEquals(2, policies.size()); assertNotNull(policies.get("xyz")); assertNotNull(policies.get("policy1")); + + Map diagnostics = (Map) response.get("diagnostics"); + List sortedNodes = (List) diagnostics.get("sortedNodes"); + assertNotNull(sortedNodes); + + assertEquals(2, sortedNodes.size()); + String[] sortedNodeNames = new String[2]; + for (int i = 0; i < 2; i++) { + Map node = (Map) sortedNodes.get(i); + assertNotNull(node); + assertEquals(5, node.size()); + assertNotNull(sortedNodeNames[i] = (String) node.get("node")); + assertNotNull(node.get("cores")); + assertEquals(0, node.get("cores")); + assertNotNull(node.get("freedisk")); + assertNotNull(node.get("sysLoadAvg")); + assertNotNull(node.get("heapUsage")); + } + + List> violations = (List>) diagnostics.get("violations"); + assertNotNull(violations); + assertEquals(0, violations.size()); + + // lets create a collection and ensure that its details show up in the diagnostics output + CollectionAdminRequest.Create create = CollectionAdminRequest.Create.createCollection("readApiTest", 1, 2); + CollectionAdminResponse adminResponse = create.process(solrClient); + assertTrue(adminResponse.isSuccess()); + + // get the diagnostics output again + queryResponse = solrClient.query(query); + response = queryResponse.getResponse(); + diagnostics = (Map) response.get("diagnostics"); + sortedNodes = (List) diagnostics.get("sortedNodes"); + assertNotNull(sortedNodes); + + assertEquals(2, sortedNodes.size()); + for (int i = 0; i < 2; i++) { + Map node = (Map) sortedNodes.get(i); + assertNotNull(node); + assertEquals(5, node.size()); + assertNotNull(node.get("node")); + assertEquals(sortedNodeNames[i], node.get("node")); + assertNotNull(node.get("cores")); + assertEquals(1, node.get("cores")); + assertNotNull(node.get("freedisk")); + assertNotNull(node.get("sysLoadAvg")); + assertNotNull(node.get("heapUsage")); + } + + violations = (List>) diagnostics.get("violations"); + assertNotNull(violations); + assertEquals(0, violations.size()); + + // lets create a collection which violates the rule replicas < 2 + create = CollectionAdminRequest.Create.createCollection("readApiTestViolations", 1, 6); + create.setMaxShardsPerNode(10); + adminResponse = create.process(solrClient); + assertTrue(adminResponse.isSuccess()); + + // get the diagnostics output again + queryResponse = solrClient.query(query); + response = queryResponse.getResponse(); + diagnostics = (Map) response.get("diagnostics"); + sortedNodes = (List) diagnostics.get("sortedNodes"); + assertNotNull(sortedNodes); + + violations = (List>) diagnostics.get("violations"); + assertNotNull(violations); + assertEquals(2, violations.size()); + for (Map violation : violations) { + assertEquals("readApiTestViolations", violation.get("collection")); + assertEquals("shard1", violation.get("shard")); + assertEquals(Utils.makeMap("replica", "3"), violation.get("violation")); + assertNotNull(violation.get("clause")); + } } static class AutoScalingRequest extends SolrRequest { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrClientDataProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrClientDataProvider.java index 03a652d6d98..0e5e3d17436 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrClientDataProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrClientDataProvider.java @@ -167,7 +167,7 @@ public class SolrClientDataProvider implements ClusterDataProvider, MapWriter { @Override protected void getRemoteInfo(String solrNode, Set requestedTags, SnitchContext ctx) { ClientSnitchCtx snitchContext = (ClientSnitchCtx) ctx; - List groups = new ArrayList<>(); + Set groups = new HashSet<>(); List prefixes = new ArrayList<>(); if (requestedTags.contains(DISK)) { groups.add("solr.node"); @@ -177,6 +177,14 @@ public class SolrClientDataProvider implements ClusterDataProvider, MapWriter { groups.add("solr.core"); prefixes.add("CORE.coreName"); } + if (requestedTags.contains(SYSLOADAVG)) { + groups.add("solr.jvm"); + prefixes.add("os.systemLoadAverage"); + } + if (requestedTags.contains(HEAPUSAGE)) { + groups.add("solr.jvm"); + prefixes.add("memory.heap.usage"); + } if(groups.isEmpty() || prefixes.isEmpty()) return; ModifiableSolrParams params = new ModifiableSolrParams(); @@ -188,7 +196,7 @@ public class SolrClientDataProvider implements ClusterDataProvider, MapWriter { Map m = rsp.nl.asMap(4); if(requestedTags.contains(DISK)){ Number n = (Number) Utils.getObjectByPath(m,true, "metrics/solr.node/CONTAINER.fs.usableSpace/value"); - if(n != null) ctx.getTags().put(DISK, n.longValue()); + if(n != null) ctx.getTags().put(DISK, n.doubleValue() / 1024.0d / 1024.0d / 1024.0d); } if(requestedTags.contains(CORES)){ int count = 0; @@ -198,7 +206,14 @@ public class SolrClientDataProvider implements ClusterDataProvider, MapWriter { } ctx.getTags().put(CORES, count); } - + if (requestedTags.contains(SYSLOADAVG)) { + Number n = (Number) Utils.getObjectByPath(m, true, "metrics/solr.jvm/os.systemLoadAverage/value"); + if (n != null) ctx.getTags().put(SYSLOADAVG, n.doubleValue() * 100.0d); + } + if (requestedTags.contains(HEAPUSAGE)) { + Number n = (Number) Utils.getObjectByPath(m, true, "metrics/solr.jvm/memory.heap.usage/value"); + if (n != null) ctx.getTags().put(HEAPUSAGE, n.doubleValue() * 100.0d); + } } catch (Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "", e); } diff --git a/solr/solrj/src/java/org/apache/solr/cloud/autoscaling/Policy.java b/solr/solrj/src/java/org/apache/solr/cloud/autoscaling/Policy.java index 11a6250c54b..de82fd59391 100644 --- a/solr/solrj/src/java/org/apache/solr/cloud/autoscaling/Policy.java +++ b/solr/solrj/src/java/org/apache/solr/cloud/autoscaling/Policy.java @@ -100,6 +100,10 @@ public class Policy implements MapWriter { return clusterPolicy; } + public List getClusterPreferences() { + return clusterPreferences; + } + @Override public void writeMap(EntryWriter ew) throws IOException { if (!policies.isEmpty()) { @@ -241,7 +245,7 @@ public class Policy implements MapWriter { } enum SortParam { - freedisk, cores, heap, cpu; + freedisk, cores, heapUsage, sysLoadAvg; static SortParam get(String m) { for (SortParam p : values()) if (p.name().equals(m)) return p; diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/rule/ImplicitSnitch.java b/solr/solrj/src/java/org/apache/solr/common/cloud/rule/ImplicitSnitch.java index d55df399064..8e596dc3d6e 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/rule/ImplicitSnitch.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/rule/ImplicitSnitch.java @@ -48,6 +48,8 @@ public class ImplicitSnitch extends Snitch { public static final String ROLE = "role"; public static final String NODEROLE = "nodeRole"; public static final String SYSPROP = "sysprop."; + public static final String SYSLOADAVG = "sysLoadAvg"; + public static final String HEAPUSAGE = "heapUsage"; public static final List IP_SNITCHES = Collections.unmodifiableList(Arrays.asList("ip_1", "ip_2", "ip_3", "ip_4")); public static final Set tags = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(NODE, PORT, HOST, CORES, DISK, ROLE, "ip_1", "ip_2", "ip_3", "ip_4"))); diff --git a/solr/solrj/src/test/org/apache/solr/cloud/autoscaling/TestPolicy.java b/solr/solrj/src/test/org/apache/solr/cloud/autoscaling/TestPolicy.java index 5f49bea4973..36c18e01059 100644 --- a/solr/solrj/src/test/org/apache/solr/cloud/autoscaling/TestPolicy.java +++ b/solr/solrj/src/test/org/apache/solr/cloud/autoscaling/TestPolicy.java @@ -166,13 +166,13 @@ public class TestPolicy extends SolrTestCaseJ4 { " cluster-preferences:[" + "{minimize:cores , precision:2}," + "{maximize:freedisk, precision:50}, " + - "{minimize:heap, precision:1000}]}"; + "{minimize:heapUsage, precision:1000}]}"; Map nodeValues = (Map) Utils.fromJSONString("{" + - "node1:{cores:12, freedisk: 334, heap:10480}," + - "node2:{cores:4, freedisk: 749, heap:6873}," + - "node3:{cores:7, freedisk: 262, heap:7834}," + - "node4:{cores:8, freedisk: 375, heap:16900, nodeRole:overseer}" + + "node1:{cores:12, freedisk: 334, heapUsage:10480}," + + "node2:{cores:4, freedisk: 749, heapUsage:6873}," + + "node3:{cores:7, freedisk: 262, heapUsage:7834}," + + "node4:{cores:8, freedisk: 375, heapUsage:16900, nodeRole:overseer}" + "}"); Policy policy = new Policy((Map) Utils.fromJSONString(rules)); @@ -199,11 +199,11 @@ public class TestPolicy extends SolrTestCaseJ4 { assertEquals("node2", operation.get("node")); nodeValues = (Map) Utils.fromJSONString("{" + - "node1:{cores:12, freedisk: 334, heap:10480}," + - "node2:{cores:4, freedisk: 749, heap:6873}," + - "node3:{cores:7, freedisk: 262, heap:7834}," + - "node5:{cores:0, freedisk: 895, heap:17834}," + - "node4:{cores:8, freedisk: 375, heap:16900, nodeRole:overseer}" + + "node1:{cores:12, freedisk: 334, heapUsage:10480}," + + "node2:{cores:4, freedisk: 749, heapUsage:6873}," + + "node3:{cores:7, freedisk: 262, heapUsage:7834}," + + "node5:{cores:0, freedisk: 895, heapUsage:17834}," + + "node4:{cores:8, freedisk: 375, heapUsage:16900, nodeRole:overseer}" + "}"); session = policy.createSession(getClusterDataProvider(nodeValues, clusterState)); SolrRequest opReq = session.getSuggester(MOVEREPLICA) @@ -282,7 +282,7 @@ public class TestPolicy extends SolrTestCaseJ4 { "'cluster-preferences':[" + "{'minimize':'cores','precision':2}," + "{'maximize':'freedisk','precision':50}," + - "{'minimize':'heap','precision':1000}" + + "{'minimize':'heapUsage','precision':1000}" + "]," + "'cluster-policy':[" + "{'nodeRole':'!overseer','strict':false}," + @@ -300,10 +300,10 @@ public class TestPolicy extends SolrTestCaseJ4 { "}"; Map nodeValues = (Map) Utils.fromJSONString("{" + - "node1:{cores:12, freedisk: 334, heap:10480, rack: rack4}," + - "node2:{cores:4, freedisk: 749, heap:6873, rack: rack3}," + - "node3:{cores:7, freedisk: 262, heap:7834, rack: rack2}," + - "node4:{cores:8, freedisk: 375, heap:16900, nodeRole:overseer, rack: rack1}" + + "node1:{cores:12, freedisk: 334, heapUsage:10480, rack: rack4}," + + "node2:{cores:4, freedisk: 749, heapUsage:6873, rack: rack3}," + + "node3:{cores:7, freedisk: 262, heapUsage:7834, rack: rack2}," + + "node4:{cores:8, freedisk: 375, heapUsage:16900, nodeRole:overseer, rack: rack1}" + "}"); Policy policy = new Policy((Map) Utils.fromJSONString(rules)); ClusterDataProvider clusterDataProvider = getClusterDataProvider(nodeValues, clusterState);