From d32ce81eab69239b03f4f1b4974aa4a1b19fcd06 Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Thu, 24 May 2018 01:26:50 +1000 Subject: [PATCH] SOLR-12358: Autoscaling suggestions fail randomly with sorting --- solr/CHANGES.txt | 2 + .../autoscaling/AutoScalingHandlerTest.java | 4 +- .../solrj/cloud/autoscaling/Policy.java | 32 +- .../solrj/cloud/autoscaling/PolicyHelper.java | 1 + .../solrj/cloud/autoscaling/Preference.java | 3 - .../client/solrj/cloud/autoscaling/Row.java | 10 +- .../solrj/cloud/autoscaling/TestPolicy.java | 310 +++++++++++++++++- 7 files changed, 349 insertions(+), 13 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5be0a936115..9ec2de39026 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -233,6 +233,8 @@ Bug Fixes * SOLR-3567: Spellcheck custom parameters not being passed through due to wrong prefix creation. (Josh Lucas via shalin) +* SOLR-12358: Autoscaling suggestions fail randomly with sorting (noble) + Optimizations ---------------------- diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java index 2aec88e0b3e..483b60c14ff 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java @@ -757,14 +757,14 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { response = solrClient.request(req); Map diagnostics = (Map) response.get("diagnostics"); - List sortedNodes = (List) diagnostics.get("sortedNodes"); + List sortedNodes = (List) Utils.getObjectByPath(response, false, "diagnostics/sortedNodes"); assertNotNull(sortedNodes); assertEquals(2, sortedNodes.size()); for (int i = 0; i < 2; i++) { Map node = (Map) sortedNodes.get(i); assertNotNull(node); - assertEquals(5, node.size()); + assertEquals(6, node.size()); assertNotNull(node.get("node")); assertNotNull(node.get("cores")); assertEquals(0L, node.get("cores")); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java index 05c9c205ab9..fb01cc5e962 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Policy.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -359,14 +360,35 @@ public class Policy implements MapWriter { } static void setApproxValuesAndSortNodes(List clusterPreferences, List matrix) { + List deadNodes = null; + Iterator it =matrix.iterator(); + while (it.hasNext()){ + Row row = it.next(); + if(!row.isLive){ + if(deadNodes == null) deadNodes = new ArrayList<>(); + deadNodes.add(row); + it.remove(); + } + } + if (!clusterPreferences.isEmpty()) { //this is to set the approximate value according to the precision ArrayList tmpMatrix = new ArrayList<>(matrix); + Row[] lastComparison = new Row[2]; for (Preference p : clusterPreferences) { try { - tmpMatrix.sort((r1, r2) -> p.compare(r1, r2, false)); + tmpMatrix.sort((r1, r2) -> { + lastComparison[0] = r1; + lastComparison[1] = r2; + return p.compare(r1, r2, false); + }); } catch (Exception e) { - LOG.error("Exception! prefs = {}, matrix = {}", clusterPreferences, matrix); + LOG.error("Exception! prefs = {}, recent r1 = {}, r2 = {}, compare : {} matrix = {}", + clusterPreferences, + lastComparison[0].node, + lastComparison[1].node, + p.compare(lastComparison[0],lastComparison[1], false ), + Utils.toJSONString(Utils.getDeepCopy(tmpMatrix, 6, false))); throw e; } p.setApproxVal(tmpMatrix); @@ -378,6 +400,12 @@ public class Policy implements MapWriter { if (result == 0) result = clusterPreferences.get(0).compare(r1, r2, false); return result; }); + + if(deadNodes != null){ + for (Row deadNode : deadNodes) { + matrix.add(0, deadNode); + } + } } } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java index a4539ec4a05..7785fda5b4b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/PolicyHelper.java @@ -190,6 +190,7 @@ public class PolicyHelper { List> sortedNodes = new ArrayList<>(sorted.size()); for (Row row : sorted) { Map map = Utils.makeMap("node", row.node); + map.put("isLive", row.isLive); for (Cell cell : row.getCells()) { for (Preference clusterPreference : clusterPreferences) { Policy.SortParam name = clusterPreference.getName(); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Preference.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Preference.java index 3e73632bf39..0db9886ae7c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Preference.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Preference.java @@ -63,9 +63,6 @@ public class Preference implements MapWriter { // recursive, it uses the precision to tie & when there is a tie use the next preference to compare // in non-recursive mode, precision is not taken into consideration and sort is done on actual value int compare(Row r1, Row r2, boolean useApprox) { - if (!r1.isLive && !r2.isLive) return 0; - if (!r1.isLive) return -1; - if (!r2.isLive) return 1; Object o1 = useApprox ? r1.cells[idx].approxVal : r1.cells[idx].val; Object o2 = useApprox ? r2.cells[idx].approxVal : r2.cells[idx].val; int result = 0; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Row.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Row.java index 659f315c120..f1799c4199f 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Row.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/Row.java @@ -19,6 +19,7 @@ package org.apache.solr.client.solrj.cloud.autoscaling; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -27,7 +28,6 @@ import java.util.Random; import java.util.function.Consumer; import java.util.stream.Collectors; -import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.MapWriter; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.ZkStateReader; @@ -81,10 +81,10 @@ public class Row implements MapWriter { @Override public void writeMap(EntryWriter ew) throws IOException { - ew.put(node, (IteratorWriter) iw -> { - iw.add((MapWriter) e -> e.put("replicas", collectionVsShardVsReplicas)); - for (Cell cell : cells) iw.add(cell); - }); + ew.put(NODE, node); + ew.put("replicas", collectionVsShardVsReplicas); + ew.put("isLive", isLive); + ew.put("attributes", Arrays.asList(cells)); } Row copy(Policy.Session session) { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/cloud/autoscaling/TestPolicy.java b/solr/solrj/src/test/org/apache/solr/client/solrj/cloud/autoscaling/TestPolicy.java index a53b60cc40b..a099171eab3 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/cloud/autoscaling/TestPolicy.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/cloud/autoscaling/TestPolicy.java @@ -2160,5 +2160,313 @@ public void testUtilizeNodeFailure2() throws Exception { assertEquals("count = "+count ,1,count); } - + //SOLR-12358 + public void testSortError() { + Policy policy = new Policy((Map) Utils.fromJSONString("{cluster-preferences: [{minimize : cores, precision:1}, " + + "{maximize : freedisk, precision: 50}, " + + "{minimize: sysLoadAvg}]}")); + String rowsData = "{'sortedNodes':[" + + " {" + + " 'node':'solr-01:8983_solr'," + + " 'replicas':{}," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':2}," + + " {'freedisk':1734.5261459350586}," + + " {'sysLoadAvg':35.0}," + + " {'node':'solr-01:8983_solr'}]}," + + " {" + + " 'node':'solr-07:8983_solr'," + + " 'replicas':{}," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1721.5669250488281}," + + " {'sysLoadAvg':10.0}," + + " {'node':'solr-07:8983_solr'}]}," + + " {" + + " 'node':'solr-08:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1764.9518203735352}," + + " {'sysLoadAvg':330.0}," + + " {'node':'solr-08:8983_solr'}]}," + + " {" + + " 'node':'solr-25:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1779.7792778015137}," + + " {'sysLoadAvg':304.0}," + + " {'node':'solr-25:8983_solr'}]}," + + " {" + + " 'node':'solr-15:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1697.5930519104004}," + + " {'sysLoadAvg':277.0}," + + " {'node':'solr-15:8983_solr'}]}," + + " {" + + " 'node':'solr-13:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':2}," + + " {'freedisk':1755.1909484863281}," + + " {'sysLoadAvg':265.0}," + + " {'node':'solr-13:8983_solr'}]}," + + " {" + + " 'node':'solr-14:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1757.6035423278809}," + + " {'sysLoadAvg':61.0}," + + " {'node':'solr-14:8983_solr'}]}," + + " {" + + " 'node':'solr-16:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1746.081386566162}," + + " {'sysLoadAvg':260.0}," + + " {'node':'solr-16:8983_solr'}]}," + + " {" + + " 'node':'solr-04:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':2}," + + " {'freedisk':1708.7230529785156}," + + " {'sysLoadAvg':216.0}," + + " {'node':'solr-04:8983_solr'}]}," + + " {" + + " 'node':'solr-06:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1688.3182678222656}," + + " {'sysLoadAvg':385.0}," + + " {'node':'solr-06:8983_solr'}]}," + + " {" + + " 'node':'solr-02:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':6}," + + " {'freedisk':1778.226963043213}," + + " {'sysLoadAvg':369.0}," + + " {'node':'solr-02:8983_solr'}]}," + + " {" + + " 'node':'solr-05:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1741.9401931762695}," + + " {'sysLoadAvg':354.0}," + + " {'node':'solr-05:8983_solr'}]}," + + " {" + + " 'node':'solr-23:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1718.854579925537}," + + " {'sysLoadAvg':329.0}," + + " {'node':'solr-23:8983_solr'}]}," + + " {" + + " 'node':'solr-24:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1733.6669311523438}," + + " {'sysLoadAvg':327.0}," + + " {'node':'solr-24:8983_solr'}]}," + + " {" + + " 'node':'solr-09:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1714.6191711425781}," + + " {'sysLoadAvg':278.0}," + + " {'node':'solr-09:8983_solr'}]}," + + " {" + + " 'node':'solr-10:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1755.3038482666016}," + + " {'sysLoadAvg':266.0}," + + " {'node':'solr-10:8983_solr'}]}," + + " {" + + " 'node':'solr-28:8983_solr'," + + " 'isLive':false," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1691.3830909729004}," + + " {'sysLoadAvg':261.0}," + + " {'node':'solr-28:8983_solr'}]}," + + " {" + + " 'node':'solr-29:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':2}," + + " {'freedisk':1706.797966003418}," + + " {'sysLoadAvg':252.99999999999997}," + + " {'node':'solr-29:8983_solr'}]}," + + " {" + + " 'node':'solr-32:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1762.432300567627}," + + " {'sysLoadAvg':221.0}," + + " {'node':'solr-32:8983_solr'}]}," + + " {" + + " 'node':'solr-21:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1760.9801979064941}," + + " {'sysLoadAvg':213.0}," + + " {'node':'solr-21:8983_solr'}]}," + + " {" + + " 'node':'solr-22:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1780.5297241210938}," + + " {'sysLoadAvg':209.0}," + + " {'node':'solr-22:8983_solr'}]}," + + " {" + + " 'node':'solr-31:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1700.1481628417969}," + + " {'sysLoadAvg':211.0}," + + " {'node':'solr-31:8983_solr'}]}," + + " {" + + " 'node':'solr-33:8983_solr'," + + " 'isLive':false," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1748.1132926940918}," + + " {'sysLoadAvg':199.0}," + + " {'node':'solr-33:8983_solr'}]}," + + " {" + + " 'node':'solr-36:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1776.197639465332}," + + " {'sysLoadAvg':193.0}," + + " {'node':'solr-36:8983_solr'}]}," + + " {" + + " 'node':'solr-35:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1746.7729606628418}," + + " {'sysLoadAvg':191.0}," + + " {'node':'solr-35:8983_solr'}]}," + + " {" + + " 'node':'solr-12:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1713.287540435791}," + + " {'sysLoadAvg':175.0}," + + " {'node':'solr-12:8983_solr'}]}," + + " {" + + " 'node':'solr-11:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1736.784511566162}," + + " {'sysLoadAvg':169.0}," + + " {'node':'solr-11:8983_solr'}]}," + + " {" + + " 'node':'solr-35:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1766.9416885375977}," + + " {'sysLoadAvg':155.0}," + + " {'node':'solr-35:8983_solr'}]}," + + " {" + + " 'node':'solr-17:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1764.3425407409668}," + + " {'sysLoadAvg':139.0}," + + " {'node':'solr-17:8983_solr'}]}," + + " {" + + " 'node':'solr-18:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':2}," + + " {'freedisk':1757.0613975524902}," + + " {'sysLoadAvg':132.0}," + + " {'node':'solr-18:8983_solr'}]}," + + " {" + + " 'node':'solr-20:8983_solr'," + + " 'isLive':false," + + " 'attributes':[" + + " {'cores':3}," + + " {'freedisk':1747.4205322265625}," + + " {'sysLoadAvg':126.0}," + + " {'node':'solr-20:8983_solr'}]}," + + " {" + + " 'node':'solr-27:8983_solr'," + + " 'isLive':true," + + " 'attributes':[" + + " {'cores':4}," + + " {'freedisk':1721.0442085266113}," + + " {'sysLoadAvg':118.0}," + + " {'node':'solr-27:8983_solr'}]}]}"; + + List l = (List) ((Map) Utils.fromJSONString(rowsData)).get("sortedNodes"); + List params = new ArrayList<>(); + params.add(Suggestion.ConditionType.CORES); + params.add(Suggestion.ConditionType.FREEDISK); + params.add(Suggestion.ConditionType.SYSLOADAVG); + params.add(Suggestion.ConditionType.NODE); + List rows = new ArrayList<>(); + for (Object o : l) { + Map m = (Map) o; + Cell[] c = new Cell[params.size()]; + List attrs = (List) m.get("attributes"); + for (int i = 0; i < params.size(); i++) { + Suggestion.ConditionType param = params.get(i); + for (Object attr : attrs) { + Object o1 = ((Map) attr).get(param.tagName); + if (o1 != null) { + o1 = param.validate(param.tagName, o1, false); + c[i] = new Cell(i, param.tagName, o1, o1, param, null); + } + } + } + rows.add(new Row((String) m.get("node"), c, false, + new HashMap<>(), + (Boolean) m.get("isLive"), null)); + } + int deadNodes = 0; + for (Row row : rows) { + if (!row.isLive) deadNodes++; + } + + Policy.setApproxValuesAndSortNodes(policy.clusterPreferences, rows); + + for (int i = 0; i < deadNodes; i++) { + assertFalse(rows.get(i).isLive); + } + + for (int i = deadNodes; i < rows.size(); i++) { + assertTrue(rows.get(i).isLive); + } + + + } + + }