mirror of https://github.com/apache/lucene.git
SOLR-4864: RegexReplaceProcessorFactory should support pattern capture group substitution in replacement string.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1586093 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0b728e5336
commit
f58f711f73
|
@ -175,6 +175,10 @@ New Features
|
|||
java -Durl="http://username:password@hostname:8983/solr/update" -jar post.jar sample.xml
|
||||
(Sameer Maggon via Uwe Schindler)
|
||||
|
||||
* SOLR-4864: RegexReplaceProcessorFactory should support pattern capture group
|
||||
substitution in replacement string.
|
||||
(Sunil Srinivasan, Jack Krupansky via Steve Rowe)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -35,12 +35,22 @@ import org.slf4j.LoggerFactory;
|
|||
/**
|
||||
* An updated processor that applies a configured regex to any
|
||||
* CharSequence values found in the selected fields, and replaces
|
||||
* any matches with the configured replacement string
|
||||
* any matches with the configured replacement string.
|
||||
*
|
||||
* <p>
|
||||
* By default this processor applies itself to no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* By default, <code>literalReplacement</code> is set to true, in which
|
||||
* case, the <code>replacement</code> string will be treated literally by
|
||||
* quoting via {@link Matcher#quoteReplacement(String)}. And hence, '\'
|
||||
* and '$' signs will not be processed. When <code>literalReplacement</code>
|
||||
* is set to false, one can perform backreference operations and capture
|
||||
* group substitutions.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, with the configuration listed below, any sequence of multiple
|
||||
* whitespace characters found in values for field named <code>title</code>
|
||||
* or <code>content</code> will be replaced by a single space character.
|
||||
|
@ -52,6 +62,7 @@ import org.slf4j.LoggerFactory;
|
|||
* <str name="fieldName">title</str>
|
||||
* <str name="pattern">\s+</str>
|
||||
* <str name="replacement"> </str>
|
||||
* <bool name="literalReplacement">true</bool>
|
||||
* </processor></pre>
|
||||
*
|
||||
* @see java.util.regex.Pattern
|
||||
|
@ -62,9 +73,12 @@ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProce
|
|||
|
||||
private static final String REPLACEMENT_PARAM = "replacement";
|
||||
private static final String PATTERN_PARAM = "pattern";
|
||||
private static final String LITERAL_REPLACEMENT_PARAM = "literalReplacement";
|
||||
|
||||
private Pattern pattern;
|
||||
private String replacement;
|
||||
// by default, literalReplacementEnabled is set to true to allow backward compatibility
|
||||
private boolean literalReplacementEnabled = true;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
|
@ -89,7 +103,18 @@ public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProce
|
|||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Missing required init parameter: " + REPLACEMENT_PARAM);
|
||||
}
|
||||
|
||||
Boolean literalReplacement = args.removeBooleanArg(LITERAL_REPLACEMENT_PARAM);
|
||||
|
||||
if (literalReplacement != null) {
|
||||
literalReplacementEnabled = literalReplacement;
|
||||
}
|
||||
|
||||
if (literalReplacementEnabled) {
|
||||
replacement = Matcher.quoteReplacement(replacementParam.toString());
|
||||
} else {
|
||||
replacement = replacementParam.toString();
|
||||
}
|
||||
|
||||
super.init(args);
|
||||
}
|
||||
|
|
|
@ -412,6 +412,45 @@
|
|||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="regex-replace-literal-true">
|
||||
<processor class="solr.RegexReplaceProcessorFactory">
|
||||
<str name="fieldName">content</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="pattern">(try)</str>
|
||||
<str name="replacement"><$1></str>
|
||||
<bool name="literalReplacement">true</bool>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="regex-replace-literal-default-true">
|
||||
<processor class="solr.RegexReplaceProcessorFactory">
|
||||
<str name="fieldName">content</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="pattern">(try)</str>
|
||||
<str name="replacement"><$1></str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="regex-replace-literal-false">
|
||||
<processor class="solr.RegexReplaceProcessorFactory">
|
||||
<str name="fieldName">content</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="pattern">(try)</str>
|
||||
<str name="replacement"><$1></str>
|
||||
<bool name="literalReplacement">false</bool>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="regex-replace-literal-str-true">
|
||||
<processor class="solr.RegexReplaceProcessorFactory">
|
||||
<str name="fieldName">content</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="pattern">(try)</str>
|
||||
<str name="replacement"><$1></str>
|
||||
<str name="literalReplacement">true</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="default-values">
|
||||
<processor class="solr.DefaultValueUpdateProcessorFactory">
|
||||
<str name="fieldName">processor_default_s</str>
|
||||
|
|
|
@ -367,6 +367,59 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
|
|||
d.getFieldValue("content"));
|
||||
assertEquals("ThisXtitleXhasXaXlotXofXspaces",
|
||||
d.getFieldValue("title"));
|
||||
|
||||
// literalReplacement = true
|
||||
d = processAdd("regex-replace-literal-true",
|
||||
doc(f("id", "doc2"),
|
||||
f("content", "Let's try this one"),
|
||||
f("title", "Let's try try this one")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("Let's <$1> this one",
|
||||
d.getFieldValue("content"));
|
||||
assertEquals("Let's <$1> <$1> this one",
|
||||
d.getFieldValue("title"));
|
||||
|
||||
// literalReplacement is not specified, defaults to true
|
||||
d = processAdd("regex-replace-literal-default-true",
|
||||
doc(f("id", "doc3"),
|
||||
f("content", "Let's try this one"),
|
||||
f("title", "Let's try try this one")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("Let's <$1> this one",
|
||||
d.getFieldValue("content"));
|
||||
assertEquals("Let's <$1> <$1> this one",
|
||||
d.getFieldValue("title"));
|
||||
|
||||
// if user passes literalReplacement as a string param instead of boolean
|
||||
d = processAdd("regex-replace-literal-str-true",
|
||||
doc(f("id", "doc4"),
|
||||
f("content", "Let's try this one"),
|
||||
f("title", "Let's try try this one")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("Let's <$1> this one",
|
||||
d.getFieldValue("content"));
|
||||
assertEquals("Let's <$1> <$1> this one",
|
||||
d.getFieldValue("title"));
|
||||
|
||||
// This is with literalReplacement = false
|
||||
d = processAdd("regex-replace-literal-false",
|
||||
doc(f("id", "doc5"),
|
||||
f("content", "Let's try this one"),
|
||||
f("title", "Let's try try this one")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("Let's <try> this one",
|
||||
d.getFieldValue("content"));
|
||||
assertEquals("Let's <try> <try> this one",
|
||||
d.getFieldValue("title"));
|
||||
|
||||
}
|
||||
|
||||
public void testFirstValue() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue