From f1173aaa486a32c5145f10db2bdd131103c4c639 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 30 Jan 2020 23:29:39 +0100 Subject: [PATCH] SQL: Add optimisations for not-equalities (#51088) (#51700) * Optimize not-equalities in con-/disjunctions This commit adds optimisations of not-equalities in conjunctions and disjunctions: * for conjunctions, the not-equality can be optimized away when applied together with a range or inequality, in case the not-equality point falls outside the domain of the later condition; if its on the boarder, it will modify the bound, to simply exclude the equality, if present; otherwise no optimisation can be applied; * for disjunctions, the not-equals could filter away the ranges and inequalities, unless these include an equality on the bound, in which case the entire condition becomes always true, but this would influence the score() function, so it's been omitted; * fix aggregations of inequalities in ranges This commit fixes the loop that aggregates inequalities into ranges: - it won't advance the outer loop index in case of a merge, since the current element is removed; - it will break the inner loop, since comparision against the element selected in the outer loop can't continue, as it had been removed. (cherry picked from commit 789724ac2cc726de603849b4eeb8194da7528bcc) --- .../xpack/sql/optimizer/Optimizer.java | 152 ++++++++++++-- .../xpack/sql/optimizer/OptimizerTests.java | 193 ++++++++++++++++++ 2 files changed, 325 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java index 610989c49d6..ee9de2e1c9a 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java @@ -1083,10 +1083,10 @@ public class Optimizer extends RuleExecutor { if (eq.left().semanticEquals(neq.left())) { Integer comp = BinaryComparison.compare(eqValue, neq.right().fold()); if (comp != null) { - if (comp == 0) { - return FALSE; // clashing and conflicting: a = 1 AND a != 1 - } else { - iter.remove(); // clashing and redundant: a = 1 AND a != 2 + if (comp == 0) { // clashing and conflicting: a = 1 AND a != 1 + return new Literal(and.source(), Boolean.FALSE, DataTypes.BOOLEAN); + } else { // clashing and redundant: a = 1 AND a != 2 + iter.remove(); changed = true; } } @@ -1102,12 +1102,12 @@ public class Optimizer extends RuleExecutor { if (bc instanceof LessThan || bc instanceof LessThanOrEqual) { // a = 2 AND a />= ? if ((compare == 0 && bc instanceof GreaterThan) || // a = 2 AND a > 2 compare < 0) { // a = 2 AND a >/>= 3 - return FALSE; + return new Literal(and.source(), Boolean.FALSE, DataTypes.BOOLEAN); } } @@ -1290,14 +1290,20 @@ public class Optimizer extends RuleExecutor { boolean changed = false; List andExps = Predicates.splitAnd(and); - // Ranges need to show up before BinaryComparisons in list, to allow the latter be optimized away into a Range, if possible + // Ranges need to show up before BinaryComparisons in list, to allow the latter be optimized away into a Range, if possible. + // NotEquals need to be last in list, to have a complete set of Ranges (ranges) and BinaryComparisons (bcs) and allow these to + // optimize the NotEquals away. andExps.sort((o1, o2) -> { if (o1 instanceof Range && o2 instanceof Range) { return 0; // keep ranges' order } else if (o1 instanceof Range || o2 instanceof Range) { - return o2 instanceof Range ? 1 : -1; + return o2 instanceof Range ? 1 : -1; // push Ranges down + } else if (o1 instanceof NotEquals && o2 instanceof NotEquals) { + return 0; // keep NotEquals' order + } else if (o1 instanceof NotEquals || o2 instanceof NotEquals) { + return o1 instanceof NotEquals ? 1 : -1; // push NotEquals up } else { - return 0; // keep non-ranges' order + return 0; // keep non-Ranges' and non-NotEquals' order } }); for (Expression ex : andExps) { @@ -1308,7 +1314,7 @@ public class Optimizer extends RuleExecutor { } else { ranges.add(r); } - } else if (ex instanceof BinaryComparison && !(ex instanceof Equals)) { + } else if (ex instanceof BinaryComparison && !(ex instanceof Equals || ex instanceof NotEquals)) { BinaryComparison bc = (BinaryComparison) ex; if (bc.right().foldable() && (findConjunctiveComparisonInRange(bc, ranges) || findExistingComparison(bc, bcs, true))) { @@ -1316,6 +1322,14 @@ public class Optimizer extends RuleExecutor { } else { bcs.add(bc); } + } else if (ex instanceof NotEquals) { + NotEquals neq = (NotEquals) ex; + if (neq.right().foldable() && notEqualsIsRemovableFromConjunction(neq, ranges, bcs)) { + // the non-equality can simply be dropped: either superfluous or has been merged with an updated range/inequality + changed = true; + } else { // not foldable OR not overlapping + exps.add(ex); + } } else { exps.add(ex); } @@ -1324,7 +1338,7 @@ public class Optimizer extends RuleExecutor { // finally try combining any left BinaryComparisons into possible Ranges // this could be a different rule but it's clearer here wrt the order of comparisons - for (int i = 0; i < bcs.size() - 1; i++) { + for (int i = 0, step = 1; i < bcs.size() - 1; i += step, step = 1) { BinaryComparison main = bcs.get(i); for (int j = i + 1; j < bcs.size(); j++) { @@ -1333,27 +1347,31 @@ public class Optimizer extends RuleExecutor { if (main.left().semanticEquals(other.left())) { // >/>= AND />= else if ((other instanceof GreaterThan || other instanceof GreaterThanOrEqual) - && (main instanceof LessThan || main instanceof LessThanOrEqual)) { - bcs.remove(j); - bcs.remove(i); + && (main instanceof LessThan || main instanceof LessThanOrEqual)) { + bcs.remove(j); + bcs.remove(i); ranges.add(new Range(and.source(), main.left(), - other.right(), other instanceof GreaterThanOrEqual, - main.right(), main instanceof LessThanOrEqual)); + other.right(), other instanceof GreaterThanOrEqual, + main.right(), main instanceof LessThanOrEqual)); - changed = true; + changed = true; + step = 0; + break; } } } @@ -1643,6 +1661,100 @@ public class Optimizer extends RuleExecutor { return false; } + + private static boolean notEqualsIsRemovableFromConjunction(NotEquals notEquals, List ranges, List bcs) { + Object neqVal = notEquals.right().fold(); + Integer comp; + + // check on "condition-overlapping" ranges: + // a != 2 AND 3 < a < 5 -> 3 < a < 5; a != 2 AND 0 < a < 1 -> 0 < a < 1 (discard NotEquals) + // a != 2 AND 2 <= a < 3 -> 2 < a < 3; a != 3 AND 2 < a <= 3 -> 2 < a < 3 (discard NotEquals, plus update Range) + // a != 2 AND 1 < a < 3 -> nop (do nothing) + for (int i = 0; i < ranges.size(); i ++) { + Range range = ranges.get(i); + + if (notEquals.left().semanticEquals(range.value())) { + comp = range.lower().foldable() ? BinaryComparison.compare(neqVal, range.lower().fold()) : null; + if (comp != null) { + if (comp <= 0) { + if (comp == 0 && range.includeLower()) { // a != 2 AND 2 <= a < ? -> 2 < a < ? + ranges.set(i, new Range(range.source(), range.value(), range.lower(), false, range.upper(), + range.includeUpper())); + } + // else: !.includeLower() : a != 2 AND 2 < a < 3 -> 2 < a < 3; or: + // else: comp < 0 : a != 2 AND 3 < a < ? -> 3 < a < ? + + return true; + } else { // comp > 0 : a != 4 AND 2 < a < ? : can only remove NotEquals if outside the range + comp = range.upper().foldable() ? BinaryComparison.compare(neqVal, range.upper().fold()) : null; + if (comp != null && comp >= 0) { + if (comp == 0 && range.includeUpper()) { // a != 4 AND 2 < a <= 4 -> 2 < a < 4 + ranges.set(i, new Range(range.source(), range.value(), range.lower(), range.includeLower(), + range.upper(), false)); + } + // else: !.includeUpper() : a != 4 AND 2 < a < 4 -> 2 < a < 4 + // else: comp > 0 : a != 4 AND 2 < a < 3 -> 2 < a < 3 + + return true; + } + // else: comp < 0 : a != 4 AND 2 < a < 5 -> nop; or: + // else: comp == null : upper bound not comparable -> nop + } + } // else: comp == null : lower bound not comparable: evaluate upper bound, in case non-equality value is ">=" + + comp = range.upper().foldable() ? BinaryComparison.compare(neqVal, range.upper().fold()) : null; + if (comp != null && comp >= 0) { + if (comp == 0 && range.includeUpper()) { // a != 3 AND ?? < a <= 3 -> ?? < a < 3 + ranges.set(i, new Range(range.source(), range.value(), range.lower(), range.includeLower(), range.upper(), + false)); + } + // else: !.includeUpper() : a != 3 AND ?? < a < 3 -> ?? < a < 3 + // else: comp > 0 : a != 3 and ?? < a < 2 -> ?? < a < 2 + + return true; + } + // else: comp < 0 : a != 3 AND ?? < a < 4 -> nop, as a decision can't be drawn; or: + // else: comp == null : a != 3 AND ?? < a < ?? -> nop + } + } + + // check on "condition-overlapping" inequalities: + // a != 2 AND a > 3 -> a > 3 (discard NotEquals) + // a != 2 AND a >= 2 -> a > 2 (discard NotEquals plus update inequality) + // a != 2 AND a > 1 -> nop (do nothing) + // + // a != 2 AND a < 3 -> nop + // a != 2 AND a <= 2 -> a < 2 + // a != 2 AND a < 1 -> a < 1 + for (int i = 0; i < bcs.size(); i ++) { + BinaryComparison bc = bcs.get(i); + + if (bc instanceof LessThan || bc instanceof LessThanOrEqual) { + comp = bc.right().foldable() ? BinaryComparison.compare(neqVal, bc.right().fold()) : null; + if (comp != null) { + if (comp >= 0) { + if (comp == 0 && bc instanceof LessThanOrEqual) { // a != 2 AND a <= 2 -> a < 2 + bcs.set(i, new LessThan(bc.source(), bc.left(), bc.right())); + } // else : comp > 0 (a != 2 AND a a a < 2) + return true; + } // else: comp < 0 : a != 2 AND a nop + } // else: non-comparable, nop + } else if (bc instanceof GreaterThan || bc instanceof GreaterThanOrEqual) { + comp = bc.right().foldable() ? BinaryComparison.compare(neqVal, bc.right().fold()) : null; + if (comp != null) { + if (comp <= 0) { + if (comp == 0 && bc instanceof GreaterThanOrEqual) { // a != 2 AND a >= 2 -> a > 2 + bcs.set(i, new GreaterThan(bc.source(), bc.left(), bc.right())); + } // else: comp < 0 (a != 2 AND a >/>= 3 -> a >/>= 3), or == 0 && bc i.of ">" (a != 2 AND a > 2 -> a > 2) + return true; + } // else: comp > 0 : a != 2 AND a >/>= 1 -> nop + } // else: non-comparable, nop + } // else: other non-relevant type + } + + return false; + } + } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java index f3e62ca4e69..2f911997de9 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java @@ -1128,6 +1128,32 @@ public class OptimizerTests extends ESTestCase { assertFalse(r.includeUpper()); } + // 1 < a AND a < 3 AND 2 < b AND b < 4 AND c < 4 -> (1 < a < 3) AND (2 < b < 4) AND c < 4 + public void testCombineMultipleComparisonsIntoRange() { + FieldAttribute fa = getFieldAttribute("a"); + FieldAttribute fb = getFieldAttribute("b"); + FieldAttribute fc = getFieldAttribute("c"); + + GreaterThan agt1 = new GreaterThan(EMPTY, fa, ONE); + LessThan alt3 = new LessThan(EMPTY, fa, THREE); + GreaterThan bgt2 = new GreaterThan(EMPTY, fb, TWO); + LessThan blt4 = new LessThan(EMPTY, fb, FOUR); + LessThan clt4 = new LessThan(EMPTY, fc, FOUR); + + Expression inputAnd = Predicates.combineAnd(Arrays.asList(agt1, alt3, bgt2, blt4, clt4)); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression outputAnd = rule.rule(inputAnd); + + Range agt1lt3 = new Range(EMPTY, fa, ONE, false, THREE, false); + Range bgt2lt4 = new Range(EMPTY, fb, TWO, false, FOUR, false); + + // The actual outcome is (c < 4) AND (1 < a < 3) AND (2 < b < 4), due to the way the Expression types are combined in the Optimizer + Expression expectedAnd = Predicates.combineAnd(Arrays.asList(clt4, agt1lt3, bgt2lt4)); + + assertTrue(outputAnd.semanticEquals(expectedAnd)); + } + // a != NULL AND a > 1 AND a < 5 AND a == 10 -> (a != NULL AND a == 10) AND 1 <= a < 5 public void testCombineUnbalancedComparisonsMixedWithEqualsIntoRange() { FieldAttribute fa = getFieldAttribute(); @@ -1247,6 +1273,173 @@ public class OptimizerTests extends ESTestCase { assertEquals(r1, exp); } + // a != 2 AND 3 < a < 5 -> 3 < a < 5 + public void testCombineBinaryComparisonsConjunction_Neq2AndRangeGt3Lt5() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + Range range = new Range(EMPTY, fa, THREE, false, FIVE, false); + And and = new And(EMPTY, range, neq); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(Range.class, exp.getClass()); + Range r = (Range) exp; + assertEquals(THREE, r.lower()); + assertFalse(r.includeLower()); + assertEquals(FIVE, r.upper()); + assertFalse(r.includeUpper()); + } + + // a != 2 AND 0 < a < 1 -> 0 < a < 1 + public void testCombineBinaryComparisonsConjunction_Neq2AndRangeGt0Lt1() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + Range range = new Range(EMPTY, fa, L(0), false, ONE, false); + And and = new And(EMPTY, neq, range); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(Range.class, exp.getClass()); + Range r = (Range) exp; + assertEquals(L(0), r.lower()); + assertFalse(r.includeLower()); + assertEquals(ONE, r.upper()); + assertFalse(r.includeUpper()); + } + + // a != 2 AND 2 <= a < 3 -> 2 < a < 3 + public void testCombineBinaryComparisonsConjunction_Neq2AndRangeGte2Lt3() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + Range range = new Range(EMPTY, fa, TWO, true, THREE, false); + And and = new And(EMPTY, neq, range); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(Range.class, exp.getClass()); + Range r = (Range) exp; + assertEquals(TWO, r.lower()); + assertFalse(r.includeLower()); + assertEquals(THREE, r.upper()); + assertFalse(r.includeUpper()); + } + + // a != 3 AND 2 < a <= 3 -> 2 < a < 3 + public void testCombineBinaryComparisonsConjunction_Neq3AndRangeGt2Lte3() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, THREE); + Range range = new Range(EMPTY, fa, TWO, false, THREE, true); + And and = new And(EMPTY, neq, range); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(Range.class, exp.getClass()); + Range r = (Range) exp; + assertEquals(TWO, r.lower()); + assertFalse(r.includeLower()); + assertEquals(THREE, r.upper()); + assertFalse(r.includeUpper()); + } + + // a != 2 AND 1 < a < 3 + public void testCombineBinaryComparisonsConjunction_Neq2AndRangeGt1Lt3() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + Range range = new Range(EMPTY, fa, ONE, false, THREE, false); + And and = new And(EMPTY, neq, range); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(And.class, exp.getClass()); // can't optimize + } + + // a != 2 AND a > 3 -> a > 3 + public void testCombineBinaryComparisonsConjunction_Neq2AndGt3() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + GreaterThan gt = new GreaterThan(EMPTY, fa, THREE); + And and = new And(EMPTY, neq, gt); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(gt, exp); + } + + // a != 2 AND a >= 2 -> a > 2 + public void testCombineBinaryComparisonsConjunction_Neq2AndGte2() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + GreaterThanOrEqual gte = new GreaterThanOrEqual(EMPTY, fa, TWO); + And and = new And(EMPTY, neq, gte); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(GreaterThan.class, exp.getClass()); + GreaterThan gt = (GreaterThan) exp; + assertEquals(TWO, gt.right()); + } + + // a != 2 AND a >= 1 -> nop + public void testCombineBinaryComparisonsConjunction_Neq2AndGte1() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + GreaterThanOrEqual gte = new GreaterThanOrEqual(EMPTY, fa, ONE); + And and = new And(EMPTY, neq, gte); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(And.class, exp.getClass()); // can't optimize + } + + // a != 2 AND a <= 3 -> nop + public void testCombineBinaryComparisonsConjunction_Neq2AndLte3() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + LessThanOrEqual lte = new LessThanOrEqual(EMPTY, fa, THREE); + And and = new And(EMPTY, neq, lte); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(and, exp); // can't optimize + } + + // a != 2 AND a <= 2 -> a < 2 + public void testCombineBinaryComparisonsConjunction_Neq2AndLte2() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + LessThanOrEqual lte = new LessThanOrEqual(EMPTY, fa, TWO); + And and = new And(EMPTY, neq, lte); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(LessThan.class, exp.getClass()); + LessThan lt = (LessThan) exp; + assertEquals(TWO, lt.right()); + } + + // a != 2 AND a <= 1 -> a <= 1 + public void testCombineBinaryComparisonsConjunction_Neq2AndLte1() { + FieldAttribute fa = getFieldAttribute(); + + NotEquals neq = new NotEquals(EMPTY, fa, TWO); + LessThanOrEqual lte = new LessThanOrEqual(EMPTY, fa, ONE); + And and = new And(EMPTY, neq, lte); + + CombineBinaryComparisons rule = new CombineBinaryComparisons(); + Expression exp = rule.rule(and); + assertEquals(lte, exp); + } + // Disjunction public void testCombineBinaryComparisonsDisjunctionNotComparable() {