diff --git a/modules/lang-painless/src/main/antlr/PainlessLexer.g4 b/modules/lang-painless/src/main/antlr/PainlessLexer.g4
index 18fdae751af..2d5af7c483a 100644
--- a/modules/lang-painless/src/main/antlr/PainlessLexer.g4
+++ b/modules/lang-painless/src/main/antlr/PainlessLexer.g4
@@ -20,7 +20,15 @@
lexer grammar PainlessLexer;
@header {
-import org.elasticsearch.painless.Definition;
+}
+
+@members{
+ protected boolean isSimpleType(String name) {
+ throw new UnsupportedOperationException("Must be implemented in a subclass");
+ }
+ protected boolean slashIsRegex() {
+ throw new UnsupportedOperationException("Must be implemented in a subclass");
+ }
}
WS: [ \t\n\r]+ -> skip;
@@ -59,7 +67,7 @@ INSTANCEOF: 'instanceof';
BOOLNOT: '!';
BWNOT: '~';
MUL: '*';
-DIV: '/' { false == SlashStrategy.slashIsRegex(this) }?;
+DIV: '/' { false == slashIsRegex() }?;
REM: '%';
ADD: '+';
SUB: '-';
@@ -108,7 +116,7 @@ INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?;
DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fFdD]?;
STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' );
-REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { SlashStrategy.slashIsRegex(this) }?;
+REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { slashIsRegex() }?;
TRUE: 'true';
FALSE: 'false';
@@ -121,7 +129,7 @@ NULL: 'null';
// or not. Note this works by processing one character at a time
// and the rule is added or removed as this happens. This is also known
// as "the lexer hack." See (https://en.wikipedia.org/wiki/The_lexer_hack).
-TYPE: ID ( DOT ID )* { Definition.isSimpleType(getText()) }?;
+TYPE: ID ( DOT ID )* { isSimpleType(getText()) }?;
ID: [_a-zA-Z] [_a-zA-Z0-9]*;
mode AFTER_DOT;
diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
index 244c2f38e62..640d9c29b20 100644
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/EnhancedPainlessLexer.java
@@ -26,13 +26,15 @@ import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenSource;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.Pair;
+import org.elasticsearch.painless.Definition;
import org.elasticsearch.painless.Location;
/**
* A lexer that is customized for painless. It:
*
- * - Overrides the default error behavior to fail on the first error
- *
- Stores the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection
+ *
- Overrides the default error behavior to fail on the first error.
+ *
- Stores the last token in case we need to do lookbehind for semicolon insertion and regex vs division detection.
+ *
- Implements the regex vs division detection.
*
- Insert semicolons where they'd improve the language's readability. Rather than hack this into the parser and create a ton of
* ambiguity we hack them here where we can use heuristics to do it quickly.
*
- Enhances the error message when a string contains invalid escape sequences to include a list of valid escape sequences.
@@ -89,6 +91,33 @@ final class EnhancedPainlessLexer extends PainlessLexer {
throw location.createError(new IllegalArgumentException(message, lnvae));
}
+ @Override
+ protected boolean isSimpleType(String name) {
+ return Definition.isSimpleType(name);
+ }
+
+ @Override
+ protected boolean slashIsRegex() {
+ Token lastToken = getPreviousToken();
+ if (lastToken == null) {
+ return true;
+ }
+ switch (lastToken.getType()) {
+ case PainlessLexer.RBRACE:
+ case PainlessLexer.RP:
+ case PainlessLexer.OCTAL:
+ case PainlessLexer.HEX:
+ case PainlessLexer.INTEGER:
+ case PainlessLexer.DECIMAL:
+ case PainlessLexer.ID:
+ case PainlessLexer.DOTINTEGER:
+ case PainlessLexer.DOTID:
+ return false;
+ default:
+ return true;
+ }
+ }
+
private static boolean insertSemicolon(Token previous, Token next) {
if (previous == null || next.getType() != PainlessLexer.RBRACK) {
return false;
diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
index 900180ec106..32d441af4fa 100644
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
+++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java
@@ -1,7 +1,6 @@
// ANTLR GENERATED CODE: DO NOT EDIT
package org.elasticsearch.painless.antlr;
-import org.elasticsearch.painless.Definition;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.CharStream;
@@ -106,6 +105,14 @@ class PainlessLexer extends Lexer {
}
+ protected boolean isSimpleType(String name) {
+ throw new UnsupportedOperationException("Must be implemented in a subclass");
+ }
+ protected boolean slashIsRegex() {
+ throw new UnsupportedOperationException("Must be implemented in a subclass");
+ }
+
+
public PainlessLexer(CharStream input) {
super(input);
_interp = new LexerATNSimulator(this,_ATN,_decisionToDFA,_sharedContextCache);
@@ -141,21 +148,21 @@ class PainlessLexer extends Lexer {
private boolean DIV_sempred(RuleContext _localctx, int predIndex) {
switch (predIndex) {
case 0:
- return false == SlashStrategy.slashIsRegex(this) ;
+ return false == slashIsRegex() ;
}
return true;
}
private boolean REGEX_sempred(RuleContext _localctx, int predIndex) {
switch (predIndex) {
case 1:
- return SlashStrategy.slashIsRegex(this) ;
+ return slashIsRegex() ;
}
return true;
}
private boolean TYPE_sempred(RuleContext _localctx, int predIndex) {
switch (predIndex) {
case 2:
- return Definition.isSimpleType(getText()) ;
+ return isSimpleType(getText()) ;
}
return true;
}
diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
deleted file mode 100644
index 698a9dfc364..00000000000
--- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/SlashStrategy.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.painless.antlr;
-
-import org.antlr.v4.runtime.Token;
-
-/**
- * Utility to figure out if a {@code /} is division or the start of a regex literal.
- */
-public class SlashStrategy {
- public static boolean slashIsRegex(PainlessLexer lexer) {
- EnhancedPainlessLexer realLexer = (EnhancedPainlessLexer) lexer;
- Token lastToken = realLexer.getPreviousToken();
- if (lastToken == null) {
- return true;
- }
- switch (lastToken.getType()) {
- case PainlessLexer.RBRACE:
- case PainlessLexer.RP:
- case PainlessLexer.OCTAL:
- case PainlessLexer.HEX:
- case PainlessLexer.INTEGER:
- case PainlessLexer.DECIMAL:
- case PainlessLexer.ID:
- case PainlessLexer.DOTINTEGER:
- case PainlessLexer.DOTID:
- return false;
- default:
- return true;
- }
- }
-}