Painless: Add } as a delimiter. Kindof.

Add `}` is statement delimiter but only in places where it is otherwise a valid part of the syntax, specificall the end of a block. We do this by matching but not consuming it. Antlr 4 doesn't have syntax for this so we have to kind of hack it together by actually matching the `}` and then seeking backwards in the token stream to "unmatch" it. This looks reasonably efficient. Not perfect, but way better than the alternatives. I tried and rejected a few options: 1. Actually consuming the `}` and piping a boolean all through the grammar from the last statement in a block to the delimiter. This ended up being a rather large change and made the grammar way more complicated. 2. Adding a semantic predicate to delimiter that just does the lookahead. This doesn't work out well because it doesn't work (I never figured out why) and because it generates an *amazing* `adaptivePredict` which makes a super huge DFA. It looks super inefficient. Closes #18821
2016-06-11 10:15:14 -04:00 · 2016-06-11 10:15:14 -04:00 · 4a265d0279
parent c3bf3b34c2
commit 4a265d0279
4 changed files with 390 additions and 336 deletions
--- a/modules/lang-painless/src/main/antlr/PainlessParser.g4
+++ b/modules/lang-painless/src/main/antlr/PainlessParser.g4
@ -92,6 +92,17 @@ trap
 delimiter
    : SEMICOLON
    | EOF
+    // RBRACK is a delimiter but we don't consume it because it is only valid
+    // in places where RBRACK can follow the statement. It is simpler to not
+    // consume it here then it is to consume it here. Unfortunately, they
+    // obvious syntax to do this `| { _input.LA(1) == RBRACK }?` generates an
+    // amazingly intense `adaptivePredict` call that doesn't actually work
+    // and builds a serious DFA. Huge. So instead we use standard ANTLR syntax
+    // to consume the token and then undo the consumption. This looks hairy but
+    // it is better than the alternatives.
+    |   { int mark = _input.mark(); int index = _input.index(); }
+            RBRACK
+        { _input.seek(index); _input.release(mark); }
    ;

 // Note we return the boolean s.  This is returned as true
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessParser.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessParser.java
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/BasicStatementTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/BasicStatementTests.java
@ -221,4 +221,13 @@ public class BasicStatementTests extends ScriptTestCase {
        });
        assertTrue(exception.getCause().getMessage().contains("Lambda functions are not supported."));
    }
+
+    public void testLastInBlockDoesntNeedSemi() {
+        // One statement in the block in case that is a special case
+        assertEquals(10, exec("def i = 1; if (i == 1) {return 10}"));
+        assertEquals(10, exec("def i = 1; if (i == 1) {return 10} else {return 12}"));
+        // Two statements in the block, in case that is the general case
+        assertEquals(10, exec("def i = 1; if (i == 1) {i = 2; return 10}"));
+        assertEquals(10, exec("def i = 1; if (i == 1) {i = 2; return 10} else {return 12}"));
+    }
 }
--- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/WhenThingsGoWrongTests.java
+++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/WhenThingsGoWrongTests.java
@ -23,6 +23,8 @@ import java.lang.invoke.WrongMethodTypeException;
 import java.util.Arrays;
 import java.util.Collections;

+import static java.util.Collections.emptyMap;
+
 public class WhenThingsGoWrongTests extends ScriptTestCase {
    public void testNullPointer() {
        expectScriptThrows(NullPointerException.class, () -> {
@ -197,4 +199,16 @@ public class WhenThingsGoWrongTests extends ScriptTestCase {
            exec("def x = new ArrayList(); x.add('foo'); return x['bogus'];");
        });
    }
+
+    /**
+     * Makes sure that we present a useful error message with a misplaced right-curly. This is important because we do some funky things in
+     * the parser with right-curly brackets to allow statements to be delimited by them at the end of blocks.
+     */
+    public void testRCurlyNotDelim() {
+        IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> {
+            // We don't want PICKY here so we get the normal error message
+            exec("def i = 1} return 1", emptyMap(), emptyMap(), null);
+        });
+        assertEquals("invalid sequence of tokens near ['}'].", e.getMessage());
+    }
 }