From a7197ac0ce8333ce7019f49c6fab690a96ff7d77 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 11 Nov 2020 12:28:11 -0500 Subject: [PATCH] SOLR-14971: Handle atomic-removes on uncommitted docs (#2056) Docs fetched from the update log via RTG look different than docs fetched from commits in the index: the types of field-values may be different between the two, etc. This is a problem for atomic add/remove of field values, where matching existing values has historically been done by object equals() calls (via Collection operations). This relies on equality checks which don't have flexible enough semantics to match values across these different types. (For example, `new Long(1).equals(new Integer(1))` returns `false`). This was causing some add-distinct and remove operations on uncommitted values to silently fail to remove field values. This commit patches over this by converting between types in the more common cases before using the fallback behavior. --- solr/CHANGES.txt | 2 + .../processor/AtomicUpdateDocumentMerger.java | 121 ++++-- .../processor/AtomicUpdateJavabinTest.java | 370 ++++++++++++++++++ .../AtomicUpdateRemovalJavabinTest.java | 132 ------- .../update/processor/AtomicUpdatesTest.java | 32 ++ 5 files changed, 495 insertions(+), 162 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java delete mode 100644 solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 69e4b633abb..96a721af002 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -192,6 +192,8 @@ Bug Fixes * SOLR-14961: Fix for deleting zookeeper nodes with same path length. Only the first zk-node was removed. (Michael Aleythe via Mike Drob) +* SOLR-14971: AtomicUpdate 'remove', 'add-distinct' operations now works on numeric, date fields in uncommitted docs (Jason Gerlowski) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java index 673c4fae191..bdc4a72aaf8 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java +++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java @@ -16,21 +16,6 @@ */ package org.apache.solr.update.processor; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Stream; - import org.apache.commons.lang3.tuple.Pair; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.BytesRef; @@ -53,10 +38,20 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.NumericValueFieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.util.DateMathParser; import org.apache.solr.util.RefCounted; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.*; +import java.util.Map.Entry; +import java.util.function.BiConsumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + import static org.apache.solr.common.params.CommonParams.ID; /** @@ -189,7 +184,7 @@ public class AtomicUpdateDocumentMerger { if (fieldName.equals(uniqueKeyFieldName) || fieldName.equals(CommonParams.VERSION_FIELD) || fieldName.equals(routeFieldOrNull)) { - if (fieldValue instanceof Map ) { + if (fieldValue instanceof Map) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Updating unique key, version or route field is not allowed: " + sdoc.getField(fieldName)); } else { @@ -450,9 +445,6 @@ public class AtomicUpdateDocumentMerger { final String name = sif.getName(); SolrInputField existingField = toDoc.get(name); - // throws exception if field doesn't exist - SchemaField sf = schema.getField(name); - Collection original = existingField != null ? existingField.getValues() : new ArrayList<>(); @@ -460,16 +452,10 @@ public class AtomicUpdateDocumentMerger { int initialSize = original.size(); if (fieldVal instanceof Collection) { for (Object object : (Collection) fieldVal) { - Object obj = sf.getType().toNativeType(object); - if (!original.contains(obj)) { - original.add(obj); - } + addValueIfDistinct(name, original, object); } } else { - Object object = sf.getType().toNativeType(fieldVal); - if (!original.contains(object)) { - original.add(object); - } + addValueIfDistinct(name, original, fieldVal); } if (original.size() > initialSize) { // update only if more are added @@ -516,7 +502,7 @@ public class AtomicUpdateDocumentMerger { SolrInputField existingField = toDoc.get(name); if (existingField == null) return; @SuppressWarnings({"rawtypes"}) - final Collection original = existingField.getValues(); + final Collection original = existingField.getValues(); if (fieldVal instanceof Collection) { for (Object object : (Collection) fieldVal) { removeObj(original, object, name); @@ -582,11 +568,11 @@ public class AtomicUpdateDocumentMerger { return objValues.iterator().next() instanceof SolrDocumentBase; } - private void removeObj(@SuppressWarnings({"rawtypes"})Collection original, Object toRemove, String fieldName) { + private void removeObj(Collection original, Object toRemove, String fieldName) { if(isChildDoc(toRemove)) { removeChildDoc(original, (SolrInputDocument) toRemove); } else { - original.remove(getNativeFieldValue(fieldName, toRemove)); + removeFieldValueWithNumericFudging(fieldName, original, toRemove); } } @@ -600,6 +586,81 @@ public class AtomicUpdateDocumentMerger { } } + private void removeFieldValueWithNumericFudging(String fieldName, @SuppressWarnings({"rawtypes"}) Collection original, Object toRemove) { + if (original.size() == 0) { + return; + } + + final BiConsumer, Object> removePredicate = (coll, existingElement) -> coll.remove(existingElement); + modifyCollectionBasedOnFuzzyPresence(fieldName, original, toRemove, removePredicate, null); + } + + private void addValueIfDistinct(String fieldName, Collection original, Object toAdd) { + final BiConsumer, Object> addPredicate = (coll, newElement) -> coll.add(newElement); + modifyCollectionBasedOnFuzzyPresence(fieldName, original, toAdd, null, addPredicate); + } + + /** + * Modifies a collection based on the (loosely-judged) presence or absence of a specific value + * + * Several classes of atomic update (notably 'remove' and 'add-distinct') rely on being able to identify whether an + * item is already present in a given list of values. Unfortunately the 'item' being checked for may be of different + * types based on the format of the user request and on where the existing document was pulled from (tlog vs index). + * As a result atomic updates needs a "fuzzy" way of checking presence and equality that is more flexible than + * traditional equality checks allow. This method does light type-checking to catch some of these more common cases + * (Long compared against Integers, String compared against Date, etc.), and calls the provided lambda to modify the + * field values as necessary. + * + * @param fieldName the field name involved in this atomic update operation + * @param original the list of values currently present in the existing document + * @param rawValue a value to be checked for in 'original' + * @param ifPresent a function to execute if rawValue was found in 'original' + * @param ifAbsent a function to execute if rawValue was not found in 'original' + */ + private void modifyCollectionBasedOnFuzzyPresence(String fieldName, Collection original, Object rawValue, + BiConsumer, Object> ifPresent, + BiConsumer, Object> ifAbsent) { + Object nativeValue = getNativeFieldValue(fieldName, rawValue); + Optional matchingValue = findObjectWithTypeFuzziness(original, rawValue, nativeValue); + if (matchingValue.isPresent() && ifPresent != null) { + ifPresent.accept(original, matchingValue.get()); + } else if(matchingValue.isEmpty() && ifAbsent != null) { + ifAbsent.accept(original, rawValue); + } + } + + private Optional findObjectWithTypeFuzziness(Collection original, Object rawValue, Object nativeValue) { + if (nativeValue instanceof Double || nativeValue instanceof Float) { + final Number nativeAsNumber = (Number) nativeValue; + return original.stream().filter(val -> + val.equals(rawValue) || + val.equals(nativeValue) || + (val instanceof Number && ((Number) val).doubleValue() == nativeAsNumber.doubleValue()) || + (val instanceof String && val.equals(nativeAsNumber.toString()))) + .findFirst(); + } else if (nativeValue instanceof Long || nativeValue instanceof Integer) { + final Number nativeAsNumber = (Number) nativeValue; + return original.stream().filter(val -> + val.equals(rawValue) || + val.equals(nativeValue) || + (val instanceof Number && ((Number) val).longValue() == nativeAsNumber.longValue()) || + (val instanceof String && val.equals(nativeAsNumber.toString()))) + .findFirst(); + } else if (nativeValue instanceof Date) { + return original.stream().filter(val -> + val.equals(rawValue) || + val.equals(nativeValue) || + (val instanceof String && DateMathParser.parseMath(null, (String)val).equals(nativeValue))) + .findFirst(); + } else if (original.contains(nativeValue)) { + return Optional.of(nativeValue); + } else if (original.contains(rawValue)) { + return Optional.of(rawValue); + } else { + return Optional.empty(); + } + } + /** * * @param doc document to search for diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java new file mode 100644 index 00000000000..758de5f37ab --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateJavabinTest.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.time.Instant; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +/** + * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin + * + * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent + * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists + * after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example. + */ +public class AtomicUpdateJavabinTest extends SolrCloudTestCase { + private static final String COMMITTED_DOC_ID = "1"; + private static final String COMMITTED_DOC_STR_VALUES_ID = "1s"; + private static final String UNCOMMITTED_DOC_ID = "2"; + private static final String UNCOMMITTED_DOC_STR_VALUES_ID = "2s"; + private static final String COLLECTION = "collection1"; + private static final int NUM_SHARDS = 1; + private static final int NUM_REPLICAS = 1; + private static final Date DATE_1 = Date.from(Instant.ofEpochSecond(1554243309)); + private static final String DATE_1_STR = "2019-04-02T22:15:09Z"; + private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243609)); + private static final String DATE_2_STR = "2019-04-02T22:20:09Z"; + private static final Date DATE_3 = Date.from(Instant.ofEpochSecond(1554243909)); + private static final String DATE_3_STR = "2019-04-02T22:25:09Z"; + + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1) + .addConfig("conf", configset("cloud-dynamic")) + .configure(); + + CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS) + .process(cluster.getSolrClient()); + + cluster.waitForActiveCollection(COLLECTION, 1, 1); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + + final SolrInputDocument committedDoc = sdoc( + "id", COMMITTED_DOC_ID, + "title_s", "title_1", "title_s", "title_2", + "tv_mv_text", "text_1", "tv_mv_text", "text_2", + "count_is", 1, "count_is", 2, + "count_md", 1.0, "count_md", 2.0, + "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2); + final SolrInputDocument committedStrDoc = sdoc( + "id", COMMITTED_DOC_STR_VALUES_ID, + "title_s", "title_1", "title_s", "title_2", + "tv_mv_text", "text_1", "tv_mv_text", "text_2", + "count_is", "1", "count_is", "2", + "count_md", "1.0", "count_md", "2.0", + "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR); + final UpdateRequest committedRequest = new UpdateRequest() + .add(committedDoc) + .add(committedStrDoc); + committedRequest.commit(cluster.getSolrClient(), COLLECTION); + + // Upload a copy of id:1 that's uncommitted to test how atomic-updates modify values in the tlog + // See SOLR-14971 for an example of why this case needs tested separately + final SolrInputDocument uncommittedDoc = sdoc( + "id", UNCOMMITTED_DOC_ID, + "title_s", "title_1", "title_s", "title_2", + "tv_mv_text", "text_1", "tv_mv_text", "text_2", + "count_is", 1, "count_is", 2, + "count_md", 1.0, "count_md", 2.0, + "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2); + final SolrInputDocument uncommittedStrDoc = sdoc( + "id", UNCOMMITTED_DOC_STR_VALUES_ID, + "title_s", "title_1", "title_s", "title_2", + "tv_mv_text", "text_1", "tv_mv_text", "text_2", + "count_is", "1", "count_is", "2", + "count_md", "1.0", "count_md", "2.0", + "timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR); + final UpdateRequest uncommittedRequest = new UpdateRequest() + .add(uncommittedDoc) + .add(uncommittedStrDoc); + uncommittedRequest.process(cluster.getSolrClient(), COLLECTION); + } + + @Test + public void testAtomicUpdateRemovalOfStrField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicRemoveValueFromField(COMMITTED_DOC_ID, "title_s", "title_1"); + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_2"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "title_s", "title_1"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_2"); + } + + @Test + public void testAtomicUpdateRemovalOfTextField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicRemoveValueFromField(COMMITTED_DOC_ID, "tv_mv_text", "text_1"); + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_2"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2"); + } + + @Test + public void testAtomicUpdateRemovalOfIntField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2); + atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_is", 1); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 2); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2); + atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_is", 1); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 2); + } + + @Test + public void testAtomicUpdateRemovalOfDoubleField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_md", 1.0); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 2.0); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_md", 1.0); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 2.0); + } + + @Test + public void testAtomicUpdateRemovalOfDateField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicRemoveValueFromField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1); + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2); + } + + @Test + public void testAtomicUpdateAddDistinctOfDistinctValueOnStrField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_3"); + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_3"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3"); + } + + @Test + public void testAtomicUpdateAddDistinctOfDuplicateValueOnStrField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_2"); + ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_2"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2"); + } + + @Test + public void testAtomicUpdateAddDistinctOfDistinctValueOnTextField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_3"); + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_3"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3"); + } + + @Test + public void testAtomicUpdateAddDistinctOfDuplicateValueOnTextField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_2"); + ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2"); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2"); + } + + @Test + public void testAtomicUpdateAddDistinctOfDistinctValueOnIntField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 3); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2, 3); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 3); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 3); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2, 3); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 3); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3); + } + + @Test + public void testAtomicUpdateAddDistinctOfDuplicateValueOnIntField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 2); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 2); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 2); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 2); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2); + } + + @Test + public void testAtomicUpdateAddDistinctOfDistinctValueOnDoubleField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 3.0); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 3.0); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0); + } + + @Test + public void testAtomicUpdateAddDistinctOfDuplicateValueOnDoubleField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 2.0); + ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 2.0); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0); + } + + @Test + public void testAtomicUpdateAddDistinctOfDistinctValueOnDateField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_3); + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_3); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3); + } + + @Test + public void testAtomicUpdateAddDistinctOfDuplicateValueOnDateField() throws Exception { + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2); + ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2); + ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2); + ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2); + + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2); + ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2); + } + + private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception { + final SolrInputDocument doc = new SolrInputDocument(); + doc.setField("id", docId); + Map atomicUpdateRemoval = new HashMap<>(1); + atomicUpdateRemoval.put("remove", value); + doc.setField(fieldName, atomicUpdateRemoval); + + cluster.getSolrClient().add(COLLECTION, doc); + } + + private void atomicAddDistinctValueToField(String docId, String fieldName, Object value) throws Exception { + final SolrInputDocument doc = new SolrInputDocument(); + doc.setField("id", docId); + Map atomicUpdateRemoval = new HashMap<>(1); + atomicUpdateRemoval.put("add-distinct", value); + doc.setField(fieldName, atomicUpdateRemoval); + + cluster.getSolrClient().add(COLLECTION, doc); + } + + private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception { + final ModifiableSolrParams solrParams = new ModifiableSolrParams(); + solrParams.set("id", identifyingDocId); + QueryRequest request = new QueryRequest(solrParams); + request.setPath("/get"); + final QueryResponse response = request.process(cluster.getSolrClient(), COLLECTION); + + final NamedList rawResponse = response.getResponse(); + assertTrue(rawResponse.get("doc") != null); + assertTrue(rawResponse.get("doc") instanceof SolrDocument); + final SolrDocument doc = (SolrDocument) rawResponse.get("doc"); + final Collection valuesAfterUpdate = doc.getFieldValues(fieldName); + assertEquals("Expected field to have " + expectedValues.length + " values, but found " + valuesAfterUpdate.size(), + expectedValues.length, valuesAfterUpdate.size()); + for (Object expectedValue: expectedValues) { + assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue)); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java deleted file mode 100644 index c028051f644..00000000000 --- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateRemovalJavabinTest.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.solr.update.processor; - -import java.time.Instant; -import java.util.Collection; -import java.util.Date; -import java.util.HashMap; -import java.util.Map; - -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.UpdateRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.cloud.SolrCloudTestCase; -import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.SolrInputDocument; -import org.junit.BeforeClass; -import org.junit.Test; - -/** - * Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin - * - * {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent - * changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists - * after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example. - */ -public class AtomicUpdateRemovalJavabinTest extends SolrCloudTestCase { - private static final String COLLECTION = "collection1"; - private static final int NUM_SHARDS = 1; - private static final int NUM_REPLICAS = 1; - private static final Date DATE_1 = new Date(); - private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243909)); - - @BeforeClass - public static void setupCluster() throws Exception { - configureCluster(1) - .addConfig("conf", configset("cloud-dynamic")) - .configure(); - - CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS) - .process(cluster.getSolrClient()); - - cluster.waitForActiveCollection(COLLECTION, 1, 1); - - final SolrInputDocument doc1 = sdoc( - "id", "1", - "title_s", "title_1", "title_s", "title_2", - "tv_mv_text", "text_1", "tv_mv_text", "text_2", - "count_is", 1, "count_is", 2, - "count_md", 1.0, "count_md", 2.0, - "timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2); - final UpdateRequest req = new UpdateRequest() - .add(doc1); - req.commit(cluster.getSolrClient(), COLLECTION); - } - - @Test - public void testAtomicUpdateRemovalOfStrField() throws Exception { - ensureFieldHasValues("1", "title_s", "title_1", "title_2"); - atomicRemoveValueFromField("1", "title_s", "title_1"); - ensureFieldHasValues("1", "title_s", "title_2"); - } - - @Test - public void testAtomicUpdateRemovalOfTextField() throws Exception { - ensureFieldHasValues("1", "tv_mv_text", "text_1", "text_2"); - atomicRemoveValueFromField("1", "tv_mv_text", "text_1"); - ensureFieldHasValues("1", "tv_mv_text", "text_2"); - } - - @Test - public void testAtomicUpdateRemovalOfIntField() throws Exception { - ensureFieldHasValues("1", "count_is", 1, 2); - atomicRemoveValueFromField("1", "count_is", 1); - ensureFieldHasValues("1", "count_is", 2); - } - - @Test - public void testAtomicUpdateRemovalOfDoubleField() throws Exception { - ensureFieldHasValues("1", "count_md", 1.0, 2.0); - atomicRemoveValueFromField("1", "count_md", 1.0); - ensureFieldHasValues("1", "count_md", 2.0); - } - - @Test - public void testAtomicUpdateRemovalOfDateField() throws Exception { - ensureFieldHasValues("1", "timestamps_mdt", DATE_1, DATE_2); - atomicRemoveValueFromField("1", "timestamps_mdt", DATE_1); - ensureFieldHasValues("1", "timestamps_mdt", DATE_2); - } - - private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception { - final SolrInputDocument doc = new SolrInputDocument(); - doc.setField("id", docId); - Map atomicUpdateRemoval = new HashMap<>(1); - atomicUpdateRemoval.put("remove", value); - doc.setField(fieldName, atomicUpdateRemoval); - - cluster.getSolrClient().add(COLLECTION, doc); - cluster.getSolrClient().commit(COLLECTION); - } - - private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception { - final SolrClient client = cluster.getSolrClient(); - - final QueryResponse response = client.query(COLLECTION, new SolrQuery("id:" + identifyingDocId)); - final SolrDocumentList docs = response.getResults(); - assertEquals(1, docs.getNumFound()); - final Collection valuesAfterUpdate = docs.get(0).getFieldValues(fieldName); - assertEquals(expectedValues.length, valuesAfterUpdate.size()); - for (Object expectedValue: expectedValues) { - assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue)); - } - } -} diff --git a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java index 5a15e10daec..54e9f9d0e53 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/AtomicUpdatesTest.java @@ -181,6 +181,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 { assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']"); assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']"); + + // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971) + doc = new SolrInputDocument(); + doc.setField("id", "4242"); + doc.setField("values_is", new String[] {"111", "222", "333"}); + assertU(adoc(doc)); + + doc = new SolrInputDocument(); + doc.setField("id", "4242"); + doc.setField("values_is", ImmutableMap.of("remove", 111)); + assertU(adoc(doc)); + assertU(commit()); + + assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']"); + assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']"); + assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']"); } @@ -251,6 +267,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 { assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']"); assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']"); + + // Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971) + doc = new SolrInputDocument(); + doc.setField("id", "4242"); + doc.setField("values_is", new Integer[] {111, 222, 333}); + assertU(adoc(doc)); + + doc = new SolrInputDocument(); + doc.setField("id", "4242"); + doc.setField("values_is", ImmutableMap.of("remove", 111)); + assertU(adoc(doc)); + assertU(commit()); + + assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']"); + assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']"); + assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']"); } @Test