diff --git a/docs/reference/modules/scripting/painless.asciidoc b/docs/reference/modules/scripting/painless.asciidoc index db68db2c7a3..93fb136913b 100644 --- a/docs/reference/modules/scripting/painless.asciidoc +++ b/docs/reference/modules/scripting/painless.asciidoc @@ -237,8 +237,8 @@ POST hockey/player/_update_by_query ---------------------------------------------------------------- // CONSOLE -Or you can use the `Pattern.matcher` directory to get a `Matcher` instance and -remove all of the vowels in all of their names: +You can use the `Pattern.matcher` directly to get a `Matcher` instance and +remove all of the vowels in all of their last names: [source,js] ---------------------------------------------------------------- @@ -252,6 +252,59 @@ POST hockey/player/_update_by_query ---------------------------------------------------------------- // CONSOLE +`Matcher.replaceAll` is just a call to Java's `Matcher`'s +http://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#replaceAll-java.lang.String-[replaceAll] +method so it supports `$1` and `\1` for replacements: + +[source,js] +---------------------------------------------------------------- +POST hockey/player/_update_by_query +{ + "script": { + "lang": "painless", + "inline": "ctx._source.last = /n([aeiou])/.matcher(ctx._source.last).replaceAll('$1')" + } +} +---------------------------------------------------------------- +// CONSOLE + +If you need more control over replacements you can call `replaceAll` on a +`CharSequence` with a `Function` that builds the replacement. +This does not support `$1` or `\1` to access replacements because you already +have a reference to the matcher and can get them with `m.group(1)`. + +IMPORTANT: Calling `Matcher.find` inside of the function that builds the +replacement is rude and will likely break the replacement process. + +This will make all of the vowels in the hockey player's last names upper case: + +[source,js] +---------------------------------------------------------------- +POST hockey/player/_update_by_query +{ + "script": { + "lang": "painless", + "inline": "ctx._source.last = ctx._source.last.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))" + } +} +---------------------------------------------------------------- +// CONSOLE + +Or you can use the `CharSequence.replaceFirst` to make the first vowel in their +last names upper case: + +[source,js] +---------------------------------------------------------------- +POST hockey/player/_update_by_query +{ + "script": { + "lang": "painless", + "inline": "ctx._source.last = ctx._source.last.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))" + } +} +---------------------------------------------------------------- +// CONSOLE + Note: all of the `_update_by_query` examples above could really do with a `query` to limit the data that they pull back. While you *could* use a @@ -265,18 +318,18 @@ documents that they have to check. The following Java packages are available for use in the Painless language: -* https://docs.oracle.com/javase/8/docs/api/java/lang/package-summary.html[java.lang] -* https://docs.oracle.com/javase/8/docs/api/java/math/package-summary.html[java.math] -* https://docs.oracle.com/javase/8/docs/api/java/text/package-summary.html[java.text] -* https://docs.oracle.com/javase/8/docs/api/java/time/package-summary.html[java.time] -* https://docs.oracle.com/javase/8/docs/api/java/time/chrono/package-summary.html[java.time.chrono] -* https://docs.oracle.com/javase/8/docs/api/java/time/format/package-summary.html[java.time.format] -* https://docs.oracle.com/javase/8/docs/api/java/time/temporal/package-summary.html[java.time.temporal] -* https://docs.oracle.com/javase/8/docs/api/java/time/zone/package-summary.html[java.time.zone] -* https://docs.oracle.com/javase/8/docs/api/java/util/package-summary.html[java.util] -* https://docs.oracle.com/javase/8/docs/api/java/util/function/package-summary.html[java.util.function] -* https://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html[java.util.regex] -* https://docs.oracle.com/javase/8/docs/api/java/util/stream/package-summary.html[java.util.stream] +* https://docs.oracle.com/javase/8/docs/api/java/lang/package-summary.html[java.lang] +* https://docs.oracle.com/javase/8/docs/api/java/math/package-summary.html[java.math] +* https://docs.oracle.com/javase/8/docs/api/java/text/package-summary.html[java.text] +* https://docs.oracle.com/javase/8/docs/api/java/time/package-summary.html[java.time] +* https://docs.oracle.com/javase/8/docs/api/java/time/chrono/package-summary.html[java.time.chrono] +* https://docs.oracle.com/javase/8/docs/api/java/time/format/package-summary.html[java.time.format] +* https://docs.oracle.com/javase/8/docs/api/java/time/temporal/package-summary.html[java.time.temporal] +* https://docs.oracle.com/javase/8/docs/api/java/time/zone/package-summary.html[java.time.zone] +* https://docs.oracle.com/javase/8/docs/api/java/util/package-summary.html[java.util] +* https://docs.oracle.com/javase/8/docs/api/java/util/function/package-summary.html[java.util.function] +* https://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html[java.util.regex] +* https://docs.oracle.com/javase/8/docs/api/java/util/stream/package-summary.html[java.util.stream] Note that unsafe classes and methods are not included, there is no support for: diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/Augmentation.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/Augmentation.java index edd0eeab67f..9302f3c899c 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/Augmentation.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/Augmentation.java @@ -34,6 +34,7 @@ import java.util.function.ObjIntConsumer; import java.util.function.Predicate; import java.util.function.ToDoubleFunction; import java.util.regex.Matcher; +import java.util.regex.Pattern; /** Additional methods added to classes. These must be static methods with receiver as first argument */ public class Augmentation { @@ -442,4 +443,47 @@ public class Augmentation { } return map; } + + // CharSequence augmentation + /** + * Replace all matches. Similar to {@link Matcher#replaceAll(String)} but allows you to customize the replacement based on the match. + */ + public static String replaceAll(CharSequence receiver, Pattern pattern, Function replacementBuilder) { + Matcher m = pattern.matcher(receiver); + if (false == m.find()) { + // CharSequqence's toString is *supposed* to always return the characters in the sequence as a String + return receiver.toString(); + } + StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver)); + do { + m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m))); + } while (m.find()); + m.appendTail(result); + return result.toString(); + } + + /** + * Replace the first match. Similar to {@link Matcher#replaceFirst(String)} but allows you to customize the replacement based on the + * match. + */ + public static String replaceFirst(CharSequence receiver, Pattern pattern, Function replacementBuilder) { + Matcher m = pattern.matcher(receiver); + if (false == m.find()) { + // CharSequqence's toString is *supposed* to always return the characters in the sequence as a String + return receiver.toString(); + } + StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver)); + m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m))); + m.appendTail(result); + return result.toString(); + } + + /** + * The initial size of the {@link StringBuilder} used for {@link #replaceFirst(CharSequence, Pattern, Function)} and + * {@link #replaceAll(CharSequence, Pattern, Function)} for a particular sequence. We ape + * {{@link StringBuilder#StringBuilder(CharSequence)} here and add 16 extra chars to the buffer to have a little room for growth. + */ + private static int initialBufferForReplaceWith(CharSequence seq) { + return seq.length() + 16; + } } diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/java.lang.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/java.lang.txt index 17016f1d318..13f28d3ebeb 100644 --- a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/java.lang.txt +++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/java.lang.txt @@ -36,6 +36,8 @@ class CharSequence -> java.lang.CharSequence { IntStream chars() IntStream codePoints() int length() + String replaceAll*(Pattern,Function) + String replaceFirst*(Pattern,Function) CharSequence subSequence(int,int) String toString() } diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java index e255a776bed..615dec67dc4 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/RegexTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.painless; +import java.nio.CharBuffer; import java.util.Arrays; import java.util.HashSet; import java.util.regex.Pattern; @@ -175,6 +176,61 @@ public class RegexTests extends ScriptTestCase { assertEquals(Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS, exec("/./ciux.flags()")); } + public void testReplaceAllMatchesString() { + assertEquals("thE qUIck brOwn fOx", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))")); + } + + public void testReplaceAllMatchesCharSequence() { + CharSequence charSequence = CharBuffer.wrap("the quick brown fox"); + assertEquals("thE qUIck brOwn fOx", + exec("params.a.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence))); + } + + public void testReplaceAllNoMatchString() { + assertEquals("i am cat", exec("'i am cat'.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))")); + } + + public void testReplaceAllNoMatchCharSequence() { + CharSequence charSequence = CharBuffer.wrap("i am cat"); + assertEquals("i am cat", + exec("params.a.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence))); + } + + public void testReplaceAllQuoteReplacement() { + assertEquals("th/E q/U/Ick br/Own f/Ox", + exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))")); + assertEquals("th$E q$U$Ick br$Own f$Ox", + exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))")); + } + + public void testReplaceFirstMatchesString() { + assertEquals("thE quick brown fox", + exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))")); + } + + public void testReplaceFirstMatchesCharSequence() { + CharSequence charSequence = CharBuffer.wrap("the quick brown fox"); + assertEquals("thE quick brown fox", + exec("params.a.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence))); + } + + public void testReplaceFirstNoMatchString() { + assertEquals("i am cat", exec("'i am cat'.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))")); + } + + public void testReplaceFirstNoMatchCharSequence() { + CharSequence charSequence = CharBuffer.wrap("i am cat"); + assertEquals("i am cat", + exec("params.a.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence))); + } + + public void testReplaceFirstQuoteReplacement() { + assertEquals("th/E quick brown fox", + exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))")); + assertEquals("th$E quick brown fox", + exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))")); + } + public void testCantUsePatternCompile() { IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> { exec("Pattern.compile('aa')");