From 50a04c077fc4ee76e86df33e3b35aa876f71ed77 Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Mon, 5 Mar 2018 21:37:10 +1100 Subject: [PATCH] SOLR-11267: Add support for "add-distinct" atomic update operation --- solr/CHANGES.txt | 2 + .../processor/AtomicUpdateDocumentMerger.java | 40 ++++++++++++++++ .../update/processor/AtomicUpdatesTest.java | 48 +++++++++++++++++++ .../src/updating-parts-of-documents.adoc | 6 +++ 4 files changed, 96 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c7a7446fb32..d5313117a16 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -182,6 +182,8 @@ New Features * SOLR-11795: Add Solr metrics exporter for Prometheus (Minoru Osuka via koji) +* SOLR-11267: Add support for "add-distinct" atomic update operation (Amrit Sarkar via noble ) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java index 2689f0e80c2..1198bc93466 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java +++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java @@ -18,6 +18,7 @@ package org.apache.solr.update.processor; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -116,6 +117,10 @@ public class AtomicUpdateDocumentMerger { updateField = true; doInc(toDoc, sif, fieldVal); break; + case "add-distinct": + updateField = true; + doAddDistinct(toDoc, sif, fieldVal); + break; default: //Perhaps throw an error here instead? log.warn("Unknown operation for the an atomic update, operation ignored: " + key); @@ -317,6 +322,41 @@ public class AtomicUpdateDocumentMerger { toDoc.addField(sif.getName(), sf.getType().toNativeType(fieldVal)); } + protected void doAddDistinct(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { + final String name = sif.getName(); + SolrInputField existingField = toDoc.get(name); + + SchemaField sf = schema.getField(name); + + if (sf != null) { + Collection original = existingField != null ? + existingField.getValues() : + new ArrayList<>(); + + int initialSize = original.size(); + if (fieldVal instanceof Collection) { + for (Object object : (Collection) fieldVal) { + if (!original.contains(object)) { + original.add(object); + } + } + } else { + Object object = sf.getType().toNativeType(fieldVal); + if (!original.contains(object)) { + original.add(object); + } + } + + if (original.size() > initialSize) { // update only if more are added + if (original.size() == 1) { // if single value, pass the value instead of List + doAdd(toDoc, sif, original.toArray()[0]); + } else { + toDoc.setField(name, original); + } + } + } + } + protected void doInc(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) { SolrInputField numericField = toDoc.get(sif.getName()); SchemaField sf = schema.getField(sif.getName()); diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java index fefbe5d3cf6..62df01784a4 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java @@ -926,6 +926,54 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 { assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']"); } + @Test + public void testAddDistinct() throws Exception { + SolrInputDocument doc = new SolrInputDocument(); + doc.setField("id", "3"); + doc.setField("cat", new String[]{"aaa", "ccc"}); + assertU(adoc(doc)); + + doc = new SolrInputDocument(); + doc.setField("id", "4"); + doc.setField("cat", new String[]{"aaa", "ccc"}); + assertU(adoc(doc)); + + assertU(commit()); + + assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '2']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '0']"); + + + doc = new SolrInputDocument(); + doc.setField("id", "3"); + doc.setField("cat", ImmutableMap.of("add-distinct", "bbb")); + assertU(adoc(doc)); + assertU(commit()); + + assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '2']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//doc/arr[@name='cat'][count(str)=3]"); + + doc = new SolrInputDocument(); + doc.setField("id", "3"); + doc.setField("cat", ImmutableMap.of("add-distinct", Arrays.asList(new String[]{"bbb", "bbb"}))); + assertU(adoc(doc)); + assertU(commit()); + + assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '2']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '1']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//doc/arr[@name='cat'][count(str)=3]"); //'bbb' already present will not be added again + + doc = new SolrInputDocument(); + doc.setField("id", "5"); + doc.setField("cat", ImmutableMap.of("add-distinct", "bbb")); + assertU(adoc(doc)); + assertU(commit()); + + assertQ(req("q", "cat:*", "indent", "true"), "//result[@numFound = '3']"); + assertQ(req("q", "cat:bbb", "indent", "true"), "//result[@numFound = '2']"); //'cat' field not present, do 'add' atomic operation + } + @Test public void testSet() throws Exception { SolrInputDocument doc; diff --git a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc index 3efc00adc79..5e25d51f775 100644 --- a/solr/solr-ref-guide/src/updating-parts-of-documents.adoc +++ b/solr/solr-ref-guide/src/updating-parts-of-documents.adoc @@ -40,6 +40,9 @@ May be specified as a single value, or as a list for multiValued fields. `add`:: Adds the specified values to a multiValued field. May be specified as a single value, or as a list. +`add-distinct`:: +Adds the specified values to a multiValued field, only if not already present. May be specified as a single value, or as a list. + `remove`:: Removes (all occurrences of) the specified values from a multiValued field. May be specified as a single value, or as a list. @@ -67,6 +70,7 @@ If the following document exists in our collection: "price":10, "popularity":42, "categories":["kids"], + "sub_categories":["under_5","under_10"], "promo_ids":["a123x"], "tags":["free_to_try","buy_now","clearance","on_sale"] } @@ -80,6 +84,7 @@ And we apply the following update command: "price":{"set":99}, "popularity":{"inc":20}, "categories":{"add":["toys","games"]}, + "sub_categories":{"add-distinct":"under_10"}, "promo_ids":{"remove":"a123x"}, "tags":{"remove":["free_to_try","on_sale"]} } @@ -93,6 +98,7 @@ The resulting document in our collection will be: "price":99, "popularity":62, "categories":["kids","toys","games"], + "sub_categories":["under_5","under_10"], "tags":["buy_now","clearance"] } ----