SOLR-5992: add removeregex as an atomic update operation, Thanks Vitaliy

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1633019 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2014-10-19 22:45:31 +00:00
parent 04f0ea088b
commit 72df721236
3 changed files with 108 additions and 0 deletions

View File

@ -181,6 +181,8 @@ New Features
* SOLR-6517: CollectionsAPI call REBALANCELEADERS. Used to balance leaders
across nodes for a particular collection
* SOLR-5992: add "removeregex" as an atomic update operation
(Vitaliy Zhovtyuk via Erick Erickson)
Bug Fixes
----------------------

View File

@ -26,6 +26,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@ -34,6 +35,8 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -1146,6 +1149,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
updateField = true;
doRemove(oldDoc, sif, fieldVal, schema);
break;
case "removeregex":
updateField = true;
doRemoveRegex(oldDoc, sif, fieldVal);
break;
case "inc":
updateField = true;
doInc(oldDoc, schema, sif, fieldVal);
@ -1225,6 +1232,38 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
return oldSize > existingField.getValueCount();
}
private void doRemoveRegex(SolrInputDocument oldDoc, SolrInputField sif, Object valuePatterns) {
final String name = sif.getName();
final SolrInputField existingField = oldDoc.get(name);
if (existingField != null) {
final Collection<Object> valueToRemove = new HashSet<>();
final Collection<Object> original = existingField.getValues();
final Collection<Pattern> patterns = preparePatterns(valuePatterns);
for (Object value : original) {
for(Pattern pattern : patterns) {
final Matcher m = pattern.matcher(value.toString());
if (m.matches()) {
valueToRemove.add(value);
}
}
}
original.removeAll(valueToRemove);
oldDoc.setField(name, original);
}
}
private Collection<Pattern> preparePatterns(Object fieldVal) {
final Collection<Pattern> patterns = new LinkedHashSet<>(1);
if (fieldVal instanceof Collection) {
Collection<String> patternVals = (Collection<String>) fieldVal;
for (String patternVal : patternVals) {
patterns.add(Pattern.compile(patternVal));
}
} else {
patterns.add(Pattern.compile(fieldVal.toString()));
}
return patterns;
}
@Override
public void processDelete(DeleteUpdateCommand cmd) throws IOException {

View File

@ -815,6 +815,73 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
assertQ(req("q", "floatRemove:\"111.111\"", "indent", "true"), "//result[@numFound = '3']");
}
@Test
public void testRemoveregex() throws Exception {
SolrInputDocument doc;
doc = new SolrInputDocument();
doc.setField("id", "1");
doc.setField("cat", new String[]{"aaa", "bbb", "ccc", "ccc", "ddd"});
assertU(adoc(doc));
doc = new SolrInputDocument();
doc.setField("id", "2");
doc.setField("cat", new String[]{"aaa", "bbb", "bbb", "ccc", "ddd"});
assertU(adoc(doc));
doc = new SolrInputDocument();
doc.setField("id", "20");
doc.setField("cat", new String[]{"aaa", "ccc", "ddd"});
assertU(adoc(doc));
doc = new SolrInputDocument();
doc.setField("id", "21");
doc.setField("cat", new String[]{"aaa", "bbb", "ddd"});
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '3']");
doc = new SolrInputDocument();
doc.setField("id", "1");
List<String> removeList = new ArrayList<>();
removeList.add(".b.");
removeList.add("c+c");
doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '2']");
doc = new SolrInputDocument();
doc.setField("id", "21");
removeList = new ArrayList<>();
removeList.add("bb*");
removeList.add("cc+");
doc.setField("cat", ImmutableMap.of("removeregex", removeList)); //behavior when hitting Solr through ZK
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']");
doc = new SolrInputDocument();
doc.setField("id", "1");
doc.setField("cat", ImmutableMap.of("removeregex", "a.a")); //behavior when hitting Solr directly
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "cat:aaa", "indent", "true"), "//result[@numFound = '3']");
}
@Test
public void testAdd() throws Exception {
SolrInputDocument doc = new SolrInputDocument();