Move lexer hacks to EnhancedPainlessLexer

This "feels" nicer. Less classes at least.
2017-01-19 11:23:16 -05:00 · 2017-01-19 11:23:16 -05:00 · dbb4a2ca6c
parent e2da6a8ee5
commit dbb4a2ca6c
4 changed files with 54 additions and 59 deletions
--- a/modules/lang-painless/src/main/antlr/PainlessLexer.g4
+++ b/modules/lang-painless/src/main/antlr/PainlessLexer.g4
@ -20,7 +20,15 @@
 lexer grammar PainlessLexer;
@header {
-import org.elasticsearch.painless.Definition;
+}
@members{
  protected boolean isSimpleType(String name) {
    throw new UnsupportedOperationException("Must be implemented in a subclass");
  }
  protected boolean slashIsRegex() {
    throw new UnsupportedOperationException("Must be implemented in a subclass");
  }
 }
 WS: [ \t\n\r]+ -> skip;
@ -59,7 +67,7 @@ INSTANCEOF: 'instanceof';
 BOOLNOT: '!';
 BWNOT:   '~';
 MUL:     '*';
-DIV:     '/' { false == SlashStrategy.slashIsRegex(this) }?;
+DIV:     '/' { false == slashIsRegex() }?;
 REM:     '%';
 ADD:     '+';
 SUB:     '-';
@ -108,7 +116,7 @@ INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?;
 DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fFdD]?;
 STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' );
-REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { SlashStrategy.slashIsRegex(this) }?;
+REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { slashIsRegex() }?;
 TRUE:  'true';
 FALSE: 'false';
@ -121,7 +129,7 @@ NULL: 'null';
 // or not.  Note this works by processing one character at a time
 // and the rule is added or removed as this happens.  This is also known
 // as "the lexer hack."  See (https://en.wikipedia.org/wiki/The_lexer_hack).
-TYPE: ID ( DOT ID )* { Definition.isSimpleType(getText()) }?;
+TYPE: ID ( DOT ID )* { isSimpleType(getText()) }?;
 ID: [_a-zA-Z] [_a-zA-Z0-9]*;
 mode AFTER_DOT;
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
@ -26,13 +26,15 @@ import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.TokenSource;
 import org.antlr.v4.runtime.misc.Interval;
 import org.antlr.v4.runtime.misc.Pair;
 import org.elasticsearch.painless.Definition;
 import org.elasticsearch.painless.Location;
 /**
 * A lexer that is customized for painless. It:
 * <ul>
- * <li>Overrides the default error behavior to fail on the first error
+ * <li>Overrides the default error behavior to fail on the first error.
- * <li>Stores the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection
+ * <li>Stores the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection.
 * <li>Implements the regex vs division detection.
 * <li>Insert semicolons where they'd improve the language's readability. Rather than hack this into the parser and create a ton of
 * ambiguity we hack them here where we can use heuristics to do it quickly.
 * <li>Enhances the error message when a string contains invalid escape sequences to include a list of valid escape sequences.
@ -89,6 +91,33 @@ final class EnhancedPainlessLexer extends PainlessLexer {
        throw location.createError(new IllegalArgumentException(message, lnvae));
    }
    @Override
    protected boolean isSimpleType(String name) {
        return Definition.isSimpleType(name);
    }
    @Override
    protected boolean slashIsRegex() {
        Token lastToken = getPreviousToken();
        if (lastToken == null) {
            return true;
        }
        switch (lastToken.getType()) {
        case PainlessLexer.RBRACE:
        case PainlessLexer.RP:
        case PainlessLexer.OCTAL:
        case PainlessLexer.HEX:
        case PainlessLexer.INTEGER:
        case PainlessLexer.DECIMAL:
        case PainlessLexer.ID:
        case PainlessLexer.DOTINTEGER:
        case PainlessLexer.DOTID:
            return false;
        default:
            return true;
        }
    }
    private static boolean insertSemicolon(Token previous, Token next) {
        if (previous == null || next.getType() != PainlessLexer.RBRACK) {
            return false;
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
@ -1,7 +1,6 @@
 // ANTLR GENERATED CODE: DO NOT EDIT
 package org.elasticsearch.painless.antlr;
 import org.elasticsearch.painless.Definition;
 import org.antlr.v4.runtime.Lexer;
 import org.antlr.v4.runtime.CharStream;
@ -106,6 +105,14 @@ class PainlessLexer extends Lexer {
  }
    protected boolean isSimpleType(String name) {
      throw new UnsupportedOperationException("Must be implemented in a subclass");
    }
    protected boolean slashIsRegex() {
      throw new UnsupportedOperationException("Must be implemented in a subclass");
    }
  public PainlessLexer(CharStream input) {
    super(input);
    _interp = new LexerATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache);
@ -141,21 +148,21 @@ class PainlessLexer extends Lexer {
  private boolean DIV_sempred(RuleContext _localctx, int predIndex) {
    switch (predIndex) {
    case 0:
-      return  false == SlashStrategy.slashIsRegex(this) ;
+      return  false == slashIsRegex() ;
    }
    return true;
  }
  private boolean REGEX_sempred(RuleContext _localctx, int predIndex) {
    switch (predIndex) {
    case 1:
-      return  SlashStrategy.slashIsRegex(this) ;
+      return  slashIsRegex() ;
    }
    return true;
  }
  private boolean TYPE_sempred(RuleContext _localctx, int predIndex) {
    switch (predIndex) {
    case 2:
-      return  Definition.isSimpleType(getText()) ;
+      return  isSimpleType(getText()) ;
    }
    return true;
  }
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
@ -1,49 +0,0 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.painless.antlr;
 import org.antlr.v4.runtime.Token;
 /**
 * Utility to figure out if a {@code /} is division or the start of a regex literal.
 */
 public class SlashStrategy {
    public static boolean slashIsRegex(PainlessLexer lexer) {
        EnhancedPainlessLexer realLexer = (EnhancedPainlessLexer) lexer;
        Token lastToken = realLexer.getPreviousToken();
        if (lastToken == null) {
            return true;
        }
        switch (lastToken.getType()) {
        case PainlessLexer.RBRACE:
        case PainlessLexer.RP:
        case PainlessLexer.OCTAL:
        case PainlessLexer.HEX:
        case PainlessLexer.INTEGER:
        case PainlessLexer.DECIMAL:
        case PainlessLexer.ID:
        case PainlessLexer.DOTINTEGER:
        case PainlessLexer.DOTID:
            return false;
        default:
            return true;
        }
    }
 }