Painless: add "".replaceAll and "".replaceFirst
These are useful methods in groovy that give you control over the replacements used: ``` 'the quick brown fox'.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT)) ```
This commit is contained in:
parent
1aa31ec934
commit
67bfecc070
|
@ -237,8 +237,8 @@ POST hockey/player/_update_by_query
|
|||
----------------------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
Or you can use the `Pattern.matcher` directory to get a `Matcher` instance and
|
||||
remove all of the vowels in all of their names:
|
||||
You can use the `Pattern.matcher` directly to get a `Matcher` instance and
|
||||
remove all of the vowels in all of their last names:
|
||||
|
||||
[source,js]
|
||||
----------------------------------------------------------------
|
||||
|
@ -252,6 +252,59 @@ POST hockey/player/_update_by_query
|
|||
----------------------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
`Matcher.replaceAll` is just a call to Java's `Matcher`'s
|
||||
http://docs.oracle.com/javase/8/docs/api/java/util/regex/Matcher.html#replaceAll-java.lang.String-[replaceAll]
|
||||
method so it supports `$1` and `\1` for replacements:
|
||||
|
||||
[source,js]
|
||||
----------------------------------------------------------------
|
||||
POST hockey/player/_update_by_query
|
||||
{
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": "ctx._source.last = /n([aeiou])/.matcher(ctx._source.last).replaceAll('$1')"
|
||||
}
|
||||
}
|
||||
----------------------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
If you need more control over replacements you can call `replaceAll` on a
|
||||
`CharSequence` with a `Function<Matcher, String>` that builds the replacement.
|
||||
This does not support `$1` or `\1` to access replacements because you already
|
||||
have a reference to the matcher and can get them with `m.group(1)`.
|
||||
|
||||
IMPORTANT: Calling `Matcher.find` inside of the function that builds the
|
||||
replacement is rude and will likely break the replacement process.
|
||||
|
||||
This will make all of the vowels in the hockey player's last names upper case:
|
||||
|
||||
[source,js]
|
||||
----------------------------------------------------------------
|
||||
POST hockey/player/_update_by_query
|
||||
{
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": "ctx._source.last = ctx._source.last.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"
|
||||
}
|
||||
}
|
||||
----------------------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
Or you can use the `CharSequence.replaceFirst` to make the first vowel in their
|
||||
last names upper case:
|
||||
|
||||
[source,js]
|
||||
----------------------------------------------------------------
|
||||
POST hockey/player/_update_by_query
|
||||
{
|
||||
"script": {
|
||||
"lang": "painless",
|
||||
"inline": "ctx._source.last = ctx._source.last.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"
|
||||
}
|
||||
}
|
||||
----------------------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
|
||||
Note: all of the `_update_by_query` examples above could really do with a
|
||||
`query` to limit the data that they pull back. While you *could* use a
|
||||
|
|
|
@ -34,6 +34,7 @@ import java.util.function.ObjIntConsumer;
|
|||
import java.util.function.Predicate;
|
||||
import java.util.function.ToDoubleFunction;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/** Additional methods added to classes. These must be static methods with receiver as first argument */
|
||||
public class Augmentation {
|
||||
|
@ -442,4 +443,47 @@ public class Augmentation {
|
|||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
// CharSequence augmentation
|
||||
/**
|
||||
* Replace all matches. Similar to {@link Matcher#replaceAll(String)} but allows you to customize the replacement based on the match.
|
||||
*/
|
||||
public static String replaceAll(CharSequence receiver, Pattern pattern, Function<Matcher, String> replacementBuilder) {
|
||||
Matcher m = pattern.matcher(receiver);
|
||||
if (false == m.find()) {
|
||||
// CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
|
||||
return receiver.toString();
|
||||
}
|
||||
StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
|
||||
do {
|
||||
m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
|
||||
} while (m.find());
|
||||
m.appendTail(result);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace the first match. Similar to {@link Matcher#replaceFirst(String)} but allows you to customize the replacement based on the
|
||||
* match.
|
||||
*/
|
||||
public static String replaceFirst(CharSequence receiver, Pattern pattern, Function<Matcher, String> replacementBuilder) {
|
||||
Matcher m = pattern.matcher(receiver);
|
||||
if (false == m.find()) {
|
||||
// CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
|
||||
return receiver.toString();
|
||||
}
|
||||
StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
|
||||
m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
|
||||
m.appendTail(result);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* The initial size of the {@link StringBuilder} used for {@link #replaceFirst(CharSequence, Pattern, Function)} and
|
||||
* {@link #replaceAll(CharSequence, Pattern, Function)} for a particular sequence. We ape
|
||||
* {{@link StringBuilder#StringBuilder(CharSequence)} here and add 16 extra chars to the buffer to have a little room for growth.
|
||||
*/
|
||||
private static int initialBufferForReplaceWith(CharSequence seq) {
|
||||
return seq.length() + 16;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,8 @@ class CharSequence -> java.lang.CharSequence {
|
|||
IntStream chars()
|
||||
IntStream codePoints()
|
||||
int length()
|
||||
String replaceAll*(Pattern,Function)
|
||||
String replaceFirst*(Pattern,Function)
|
||||
CharSequence subSequence(int,int)
|
||||
String toString()
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.painless;
|
||||
|
||||
import java.nio.CharBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -175,6 +176,61 @@ public class RegexTests extends ScriptTestCase {
|
|||
assertEquals(Pattern.CANON_EQ | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.COMMENTS, exec("/./ciux.flags()"));
|
||||
}
|
||||
|
||||
public void testReplaceAllMatchesString() {
|
||||
assertEquals("thE qUIck brOwn fOx", exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testReplaceAllMatchesCharSequence() {
|
||||
CharSequence charSequence = CharBuffer.wrap("the quick brown fox");
|
||||
assertEquals("thE qUIck brOwn fOx",
|
||||
exec("params.a.replaceAll(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
|
||||
}
|
||||
|
||||
public void testReplaceAllNoMatchString() {
|
||||
assertEquals("i am cat", exec("'i am cat'.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testReplaceAllNoMatchCharSequence() {
|
||||
CharSequence charSequence = CharBuffer.wrap("i am cat");
|
||||
assertEquals("i am cat",
|
||||
exec("params.a.replaceAll(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
|
||||
}
|
||||
|
||||
public void testReplaceAllQuoteReplacement() {
|
||||
assertEquals("th/E q/U/Ick br/Own f/Ox",
|
||||
exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))"));
|
||||
assertEquals("th$E q$U$Ick br$Own f$Ox",
|
||||
exec("'the quick brown fox'.replaceAll(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testReplaceFirstMatchesString() {
|
||||
assertEquals("thE quick brown fox",
|
||||
exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testReplaceFirstMatchesCharSequence() {
|
||||
CharSequence charSequence = CharBuffer.wrap("the quick brown fox");
|
||||
assertEquals("thE quick brown fox",
|
||||
exec("params.a.replaceFirst(/[aeiou]/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
|
||||
}
|
||||
|
||||
public void testReplaceFirstNoMatchString() {
|
||||
assertEquals("i am cat", exec("'i am cat'.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testReplaceFirstNoMatchCharSequence() {
|
||||
CharSequence charSequence = CharBuffer.wrap("i am cat");
|
||||
assertEquals("i am cat",
|
||||
exec("params.a.replaceFirst(/dolphin/, m -> m.group().toUpperCase(Locale.ROOT))", singletonMap("a", charSequence)));
|
||||
}
|
||||
|
||||
public void testReplaceFirstQuoteReplacement() {
|
||||
assertEquals("th/E quick brown fox",
|
||||
exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '/' + m.group().toUpperCase(Locale.ROOT))"));
|
||||
assertEquals("th$E quick brown fox",
|
||||
exec("'the quick brown fox'.replaceFirst(/[aeiou]/, m -> '$' + m.group().toUpperCase(Locale.ROOT))"));
|
||||
}
|
||||
|
||||
public void testCantUsePatternCompile() {
|
||||
IllegalArgumentException e = expectScriptThrows(IllegalArgumentException.class, () -> {
|
||||
exec("Pattern.compile('aa')");
|
||||
|
|
Loading…
Reference in New Issue