diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d23c902e438..f7001bcb34d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -175,6 +175,10 @@ New Features java -Durl="http://username:password@hostname:8983/solr/update" -jar post.jar sample.xml (Sameer Maggon via Uwe Schindler) +* SOLR-4864: RegexReplaceProcessorFactory should support pattern capture group + substitution in replacement string. + (Sunil Srinivasan, Jack Krupansky via Steve Rowe) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java index f1561b445ac..129a147621a 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java @@ -35,12 +35,22 @@ import org.slf4j.LoggerFactory; /** * An updated processor that applies a configured regex to any * CharSequence values found in the selected fields, and replaces - * any matches with the configured replacement string + * any matches with the configured replacement string. + * *

* By default this processor applies itself to no fields. *

* *

+ * By default, literalReplacement is set to true, in which + * case, the replacement string will be treated literally by + * quoting via {@link Matcher#quoteReplacement(String)}. And hence, '\' + * and '$' signs will not be processed. When literalReplacement + * is set to false, one can perform backreference operations and capture + * group substitutions. + *

+ * + *

* For example, with the configuration listed below, any sequence of multiple * whitespace characters found in values for field named title * or content will be replaced by a single space character. @@ -52,8 +62,9 @@ import org.slf4j.LoggerFactory; * <str name="fieldName">title</str> * <str name="pattern">\s+</str> * <str name="replacement"> </str> + * <bool name="literalReplacement">true</bool> * </processor> - * + * * @see java.util.regex.Pattern */ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProcessorFactory { @@ -62,9 +73,12 @@ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProce private static final String REPLACEMENT_PARAM = "replacement"; private static final String PATTERN_PARAM = "pattern"; + private static final String LITERAL_REPLACEMENT_PARAM = "literalReplacement"; private Pattern pattern; private String replacement; + // by default, literalReplacementEnabled is set to true to allow backward compatibility + private boolean literalReplacementEnabled = true; @SuppressWarnings("unchecked") @Override @@ -72,7 +86,7 @@ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProce Object patternParam = args.remove(PATTERN_PARAM); - if(patternParam == null) { + if (patternParam == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + PATTERN_PARAM); } @@ -85,11 +99,22 @@ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProce } Object replacementParam = args.remove(REPLACEMENT_PARAM); - if(replacementParam == null) { + if (replacementParam == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "Missing required init parameter: " + REPLACEMENT_PARAM); } - replacement = Matcher.quoteReplacement(replacementParam.toString()); + + Boolean literalReplacement = args.removeBooleanArg(LITERAL_REPLACEMENT_PARAM); + + if (literalReplacement != null) { + literalReplacementEnabled = literalReplacement; + } + + if (literalReplacementEnabled) { + replacement = Matcher.quoteReplacement(replacementParam.toString()); + } else { + replacement = replacementParam.toString(); + } super.init(args); } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml index 1b99f61dc36..a5ffdd4488a 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml @@ -412,6 +412,45 @@ + + + content + title + (try) + <$1> + true + + + + + + content + title + (try) + <$1> + + + + + + content + title + (try) + <$1> + false + + + + + + content + title + (try) + <$1> + true + + + processor_default_s diff --git a/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java index 48ac66c9f78..d731dc9c643 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java @@ -367,6 +367,59 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase { d.getFieldValue("content")); assertEquals("ThisXtitleXhasXaXlotXofXspaces", d.getFieldValue("title")); + + // literalReplacement = true + d = processAdd("regex-replace-literal-true", + doc(f("id", "doc2"), + f("content", "Let's try this one"), + f("title", "Let's try try this one"))); + + assertNotNull(d); + + assertEquals("Let's <$1> this one", + d.getFieldValue("content")); + assertEquals("Let's <$1> <$1> this one", + d.getFieldValue("title")); + + // literalReplacement is not specified, defaults to true + d = processAdd("regex-replace-literal-default-true", + doc(f("id", "doc3"), + f("content", "Let's try this one"), + f("title", "Let's try try this one"))); + + assertNotNull(d); + + assertEquals("Let's <$1> this one", + d.getFieldValue("content")); + assertEquals("Let's <$1> <$1> this one", + d.getFieldValue("title")); + + // if user passes literalReplacement as a string param instead of boolean + d = processAdd("regex-replace-literal-str-true", + doc(f("id", "doc4"), + f("content", "Let's try this one"), + f("title", "Let's try try this one"))); + + assertNotNull(d); + + assertEquals("Let's <$1> this one", + d.getFieldValue("content")); + assertEquals("Let's <$1> <$1> this one", + d.getFieldValue("title")); + + // This is with literalReplacement = false + d = processAdd("regex-replace-literal-false", + doc(f("id", "doc5"), + f("content", "Let's try this one"), + f("title", "Let's try try this one"))); + + assertNotNull(d); + + assertEquals("Let's this one", + d.getFieldValue("content")); + assertEquals("Let's this one", + d.getFieldValue("title")); + } public void testFirstValue() throws Exception {