diff --git a/modules/lang-painless/src/main/antlr/PainlessLexer.g4 b/modules/lang-painless/src/main/antlr/PainlessLexer.g4 index f60d48efcc7..6ab6a861135 100644 --- a/modules/lang-painless/src/main/antlr/PainlessLexer.g4 +++ b/modules/lang-painless/src/main/antlr/PainlessLexer.g4 @@ -120,7 +120,7 @@ INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?; DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fFdD]?; STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' ); -REGEX: '/' ( ~('/' | '\n') | '\\' ~'\n' )+ '/' [cilmsUux]* { slashIsRegex() }?; +REGEX: '/' ( '\\' ~'\n' | ~('/' | '\n') )+? '/' [cilmsUux]* { slashIsRegex() }?; TRUE: 'true'; FALSE: 'false'; diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java index 44972061b59..fd32c59b4ff 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/PainlessLexer.java @@ -1,7 +1,5 @@ // ANTLR GENERATED CODE: DO NOT EDIT package org.elasticsearch.painless.antlr; - - import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.CharStream; import org.antlr.v4.runtime.Token; @@ -211,29 +209,29 @@ abstract class PainlessLexer extends Lexer { "\3N\3N\7N\u021a\nN\fN\16N\u021d\13N\3N\3N\3O\3O\3O\3O\3O\3P\3P\3P\3P\3"+ "P\3P\3Q\3Q\3Q\3Q\3Q\3R\3R\3R\3R\7R\u0235\nR\fR\16R\u0238\13R\3R\3R\3S"+ "\3S\7S\u023e\nS\fS\16S\u0241\13S\3T\3T\3T\7T\u0246\nT\fT\16T\u0249\13"+ - "T\5T\u024b\nT\3T\3T\3U\3U\7U\u0251\nU\fU\16U\u0254\13U\3U\3U\6\u00b9\u00c3"+ - "\u01fd\u0209\2V\4\3\6\4\b\5\n\6\f\7\16\b\20\t\22\n\24\13\26\f\30\r\32"+ - "\16\34\17\36\20 \21\"\22$\23&\24(\25*\26,\27.\30\60\31\62\32\64\33\66"+ - "\348\35:\36<\37> @!B\"D#F$H%J&L\'N(P)R*T+V,X-Z.\\/^\60`\61b\62d\63f\64"+ - "h\65j\66l\67n8p9r:t;v|?~@\u0080A\u0082B\u0084C\u0086D\u0088E\u008a"+ - "F\u008cG\u008eH\u0090I\u0092J\u0094K\u0096L\u0098M\u009aN\u009cO\u009e"+ - "P\u00a0Q\u00a2R\u00a4S\u00a6T\u00a8U\u00aaV\4\2\3\25\5\2\13\f\17\17\""+ - "\"\4\2\f\f\17\17\3\2\629\4\2NNnn\4\2ZZzz\5\2\62;CHch\3\2\63;\3\2\62;\b"+ - "\2FFHHNNffhhnn\4\2GGgg\4\2--//\6\2FFHHffhh\4\2$$^^\4\2))^^\4\2\f\f\61"+ - "\61\3\2\f\f\t\2WWeekknouuwwzz\5\2C\\aac|\6\2\62;C\\aac|\u0277\2\4\3\2"+ - "\2\2\2\6\3\2\2\2\2\b\3\2\2\2\2\n\3\2\2\2\2\f\3\2\2\2\2\16\3\2\2\2\2\20"+ - "\3\2\2\2\2\22\3\2\2\2\2\24\3\2\2\2\2\26\3\2\2\2\2\30\3\2\2\2\2\32\3\2"+ - "\2\2\2\34\3\2\2\2\2\36\3\2\2\2\2 \3\2\2\2\2\"\3\2\2\2\2$\3\2\2\2\2&\3"+ - "\2\2\2\2(\3\2\2\2\2*\3\2\2\2\2,\3\2\2\2\2.\3\2\2\2\2\60\3\2\2\2\2\62\3"+ - "\2\2\2\2\64\3\2\2\2\2\66\3\2\2\2\28\3\2\2\2\2:\3\2\2\2\2<\3\2\2\2\2>\3"+ - "\2\2\2\2@\3\2\2\2\2B\3\2\2\2\2D\3\2\2\2\2F\3\2\2\2\2H\3\2\2\2\2J\3\2\2"+ - "\2\2L\3\2\2\2\2N\3\2\2\2\2P\3\2\2\2\2R\3\2\2\2\2T\3\2\2\2\2V\3\2\2\2\2"+ - "X\3\2\2\2\2Z\3\2\2\2\2\\\3\2\2\2\2^\3\2\2\2\2`\3\2\2\2\2b\3\2\2\2\2d\3"+ - "\2\2\2\2f\3\2\2\2\2h\3\2\2\2\2j\3\2\2\2\2l\3\2\2\2\2n\3\2\2\2\2p\3\2\2"+ - "\2\2r\3\2\2\2\2t\3\2\2\2\2v\3\2\2\2\2x\3\2\2\2\2z\3\2\2\2\2|\3\2\2\2\2"+ - "~\3\2\2\2\2\u0080\3\2\2\2\2\u0082\3\2\2\2\2\u0084\3\2\2\2\2\u0086\3\2"+ - "\2\2\2\u0088\3\2\2\2\2\u008a\3\2\2\2\2\u008c\3\2\2\2\2\u008e\3\2\2\2\2"+ - "\u0090\3\2\2\2\2\u0092\3\2\2\2\2\u0094\3\2\2\2\2\u0096\3\2\2\2\2\u0098"+ + "T\5T\u024b\nT\3T\3T\3U\3U\7U\u0251\nU\fU\16U\u0254\13U\3U\3U\7\u00b9\u00c3"+ + "\u01fd\u0209\u0215\2V\4\3\6\4\b\5\n\6\f\7\16\b\20\t\22\n\24\13\26\f\30"+ + "\r\32\16\34\17\36\20 \21\"\22$\23&\24(\25*\26,\27.\30\60\31\62\32\64\33"+ + "\66\348\35:\36<\37> @!B\"D#F$H%J&L\'N(P)R*T+V,X-Z.\\/^\60`\61b\62d\63"+ + "f\64h\65j\66l\67n8p9r:t;v|?~@\u0080A\u0082B\u0084C\u0086D\u0088E"+ + "\u008aF\u008cG\u008eH\u0090I\u0092J\u0094K\u0096L\u0098M\u009aN\u009c"+ + "O\u009eP\u00a0Q\u00a2R\u00a4S\u00a6T\u00a8U\u00aaV\4\2\3\25\5\2\13\f\17"+ + "\17\"\"\4\2\f\f\17\17\3\2\629\4\2NNnn\4\2ZZzz\5\2\62;CHch\3\2\63;\3\2"+ + "\62;\b\2FFHHNNffhhnn\4\2GGgg\4\2--//\6\2FFHHffhh\4\2$$^^\4\2))^^\3\2\f"+ + "\f\4\2\f\f\61\61\t\2WWeekknouuwwzz\5\2C\\aac|\6\2\62;C\\aac|\u0277\2\4"+ + "\3\2\2\2\2\6\3\2\2\2\2\b\3\2\2\2\2\n\3\2\2\2\2\f\3\2\2\2\2\16\3\2\2\2"+ + "\2\20\3\2\2\2\2\22\3\2\2\2\2\24\3\2\2\2\2\26\3\2\2\2\2\30\3\2\2\2\2\32"+ + "\3\2\2\2\2\34\3\2\2\2\2\36\3\2\2\2\2 \3\2\2\2\2\"\3\2\2\2\2$\3\2\2\2\2"+ + "&\3\2\2\2\2(\3\2\2\2\2*\3\2\2\2\2,\3\2\2\2\2.\3\2\2\2\2\60\3\2\2\2\2\62"+ + "\3\2\2\2\2\64\3\2\2\2\2\66\3\2\2\2\28\3\2\2\2\2:\3\2\2\2\2<\3\2\2\2\2"+ + ">\3\2\2\2\2@\3\2\2\2\2B\3\2\2\2\2D\3\2\2\2\2F\3\2\2\2\2H\3\2\2\2\2J\3"+ + "\2\2\2\2L\3\2\2\2\2N\3\2\2\2\2P\3\2\2\2\2R\3\2\2\2\2T\3\2\2\2\2V\3\2\2"+ + "\2\2X\3\2\2\2\2Z\3\2\2\2\2\\\3\2\2\2\2^\3\2\2\2\2`\3\2\2\2\2b\3\2\2\2"+ + "\2d\3\2\2\2\2f\3\2\2\2\2h\3\2\2\2\2j\3\2\2\2\2l\3\2\2\2\2n\3\2\2\2\2p"+ + "\3\2\2\2\2r\3\2\2\2\2t\3\2\2\2\2v\3\2\2\2\2x\3\2\2\2\2z\3\2\2\2\2|\3\2"+ + "\2\2\2~\3\2\2\2\2\u0080\3\2\2\2\2\u0082\3\2\2\2\2\u0084\3\2\2\2\2\u0086"+ + "\3\2\2\2\2\u0088\3\2\2\2\2\u008a\3\2\2\2\2\u008c\3\2\2\2\2\u008e\3\2\2"+ + "\2\2\u0090\3\2\2\2\2\u0092\3\2\2\2\2\u0094\3\2\2\2\2\u0096\3\2\2\2\2\u0098"+ "\3\2\2\2\2\u009a\3\2\2\2\2\u009c\3\2\2\2\2\u009e\3\2\2\2\2\u00a0\3\2\2"+ "\2\2\u00a2\3\2\2\2\2\u00a4\3\2\2\2\2\u00a6\3\2\2\2\3\u00a8\3\2\2\2\3\u00aa"+ "\3\2\2\2\4\u00ad\3\2\2\2\6\u00c8\3\2\2\2\b\u00cc\3\2\2\2\n\u00ce\3\2\2"+ @@ -358,9 +356,9 @@ abstract class PainlessLexer extends Lexer { "\3\2\2\2\u0207\u0206\3\2\2\2\u0208\u020b\3\2\2\2\u0209\u020a\3\2\2\2\u0209"+ "\u0207\3\2\2\2\u020a\u020c\3\2\2\2\u020b\u0209\3\2\2\2\u020c\u020e\7)"+ "\2\2\u020d\u01f5\3\2\2\2\u020d\u0201\3\2\2\2\u020e\u009b\3\2\2\2\u020f"+ - "\u0213\7\61\2\2\u0210\u0214\n\20\2\2\u0211\u0212\7^\2\2\u0212\u0214\n"+ - "\21\2\2\u0213\u0210\3\2\2\2\u0213\u0211\3\2\2\2\u0214\u0215\3\2\2\2\u0215"+ - "\u0213\3\2\2\2\u0215\u0216\3\2\2\2\u0216\u0217\3\2\2\2\u0217\u021b\7\61"+ + "\u0213\7\61\2\2\u0210\u0211\7^\2\2\u0211\u0214\n\20\2\2\u0212\u0214\n"+ + "\21\2\2\u0213\u0210\3\2\2\2\u0213\u0212\3\2\2\2\u0214\u0215\3\2\2\2\u0215"+ + "\u0216\3\2\2\2\u0215\u0213\3\2\2\2\u0216\u0217\3\2\2\2\u0217\u021b\7\61"+ "\2\2\u0218\u021a\t\22\2\2\u0219\u0218\3\2\2\2\u021a\u021d\3\2\2\2\u021b"+ "\u0219\3\2\2\2\u021b\u021c\3\2\2\2\u021c\u021e\3\2\2\2\u021d\u021b\3\2"+ "\2\2\u021e\u021f\6N\3\2\u021f\u009d\3\2\2\2\u0220\u0221\7v\2\2\u0221\u0222"+ diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/ERegex.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/ERegex.java index 1d1f41948c4..4b38868b1b1 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/ERegex.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/ERegex.java @@ -68,8 +68,9 @@ public final class ERegex extends AExpression { try { Pattern.compile(pattern, flags); - } catch (PatternSyntaxException exception) { - throw createError(exception); + } catch (PatternSyntaxException e) { + throw new Location(location.getSourceName(), location.getOffset() + 1 + e.getIndex()).createError( + new IllegalArgumentException("Error compiling regex: " + e.getDescription())); } constant = new Constant(location, Definition.PATTERN_TYPE.type, "regexAt$" + location.getOffset(), this::initializeConstant); diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java index 1c53692ad74..83a592b3f26 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.painless; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.script.ScriptException; import java.nio.CharBuffer; import java.util.Arrays; @@ -44,8 +45,17 @@ public class RegexTests extends ScriptTestCase { assertEquals(false, exec("return 'bar' ==~ /foo/")); } - public void testSlashesEscapePattern() { - assertEquals(true, exec("return '//' ==~ /\\/\\//")); + public void testBackslashEscapesForwardSlash() { + assertEquals(true, exec("'//' ==~ /\\/\\//")); + } + + public void testBackslashEscapeBackslash() { + // Both of these are single backslashes but java escaping + Painless escaping.... + assertEquals(true, exec("'\\\\' ==~ /\\\\/")); + } + + public void testRegexIsNonGreedy() { + assertEquals(true, exec("def s = /\\\\/.split('.\\\\.'); return s[1] ==~ /\\./")); } public void testPatternAfterAssignment() { @@ -248,11 +258,14 @@ public class RegexTests extends ScriptTestCase { } public void testBadRegexPattern() { - PatternSyntaxException e = expectScriptThrows(PatternSyntaxException.class, () -> { + ScriptException e = expectThrows(ScriptException.class, () -> { exec("/\\ujjjj/"); // Invalid unicode }); - assertThat(e.getMessage(), containsString("Illegal Unicode escape sequence near index 2")); - assertThat(e.getMessage(), containsString("\\ujjjj")); + assertEquals("Error compiling regex: Illegal Unicode escape sequence", e.getCause().getMessage()); + + // And make sure the location of the error points to the offset inside the pattern + assertEquals("/\\ujjjj/", e.getScriptStack().get(0)); + assertEquals(" ^---- HERE", e.getScriptStack().get(1)); } public void testRegexAgainstNumber() {