SOLR-14971: Handle atomic-removes on uncommitted docs (#2056)

Docs fetched from the update log via RTG look different than docs
fetched from commits in the index: the types of
field-values may be different between the two, etc.

This is a problem for atomic add/remove of field values, where matching
existing values has historically been done by object equals() calls (via
Collection operations).  This relies on equality checks which don't have
flexible enough semantics to match values across these different types.
(For example, `new Long(1).equals(new Integer(1))` returns `false`).
This was causing some add-distinct and remove operations on
uncommitted values to silently fail to remove field values.

This commit patches over this by converting between types in the more
common cases before using the fallback behavior.
This commit is contained in:
Jason Gerlowski 2020-11-11 12:28:11 -05:00 committed by GitHub
parent 91ee53d418
commit a7197ac0ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 495 additions and 162 deletions

View File

@ -192,6 +192,8 @@ Bug Fixes
* SOLR-14961: Fix for deleting zookeeper nodes with same path length. Only the first zk-node was removed. (Michael Aleythe via Mike Drob) * SOLR-14961: Fix for deleting zookeeper nodes with same path length. Only the first zk-node was removed. (Michael Aleythe via Mike Drob)
* SOLR-14971: AtomicUpdate 'remove', 'add-distinct' operations now works on numeric, date fields in uncommitted docs (Jason Gerlowski)
Other Changes Other Changes
--------------------- ---------------------

View File

@ -16,21 +16,6 @@
*/ */
package org.apache.solr.update.processor; package org.apache.solr.update.processor;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -53,10 +38,20 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.NumericValueFieldType; import org.apache.solr.schema.NumericValueFieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import static org.apache.solr.common.params.CommonParams.ID; import static org.apache.solr.common.params.CommonParams.ID;
/** /**
@ -450,9 +445,6 @@ public class AtomicUpdateDocumentMerger {
final String name = sif.getName(); final String name = sif.getName();
SolrInputField existingField = toDoc.get(name); SolrInputField existingField = toDoc.get(name);
// throws exception if field doesn't exist
SchemaField sf = schema.getField(name);
Collection<Object> original = existingField != null ? Collection<Object> original = existingField != null ?
existingField.getValues() : existingField.getValues() :
new ArrayList<>(); new ArrayList<>();
@ -460,16 +452,10 @@ public class AtomicUpdateDocumentMerger {
int initialSize = original.size(); int initialSize = original.size();
if (fieldVal instanceof Collection) { if (fieldVal instanceof Collection) {
for (Object object : (Collection) fieldVal) { for (Object object : (Collection) fieldVal) {
Object obj = sf.getType().toNativeType(object); addValueIfDistinct(name, original, object);
if (!original.contains(obj)) {
original.add(obj);
}
} }
} else { } else {
Object object = sf.getType().toNativeType(fieldVal); addValueIfDistinct(name, original, fieldVal);
if (!original.contains(object)) {
original.add(object);
}
} }
if (original.size() > initialSize) { // update only if more are added if (original.size() > initialSize) { // update only if more are added
@ -516,7 +502,7 @@ public class AtomicUpdateDocumentMerger {
SolrInputField existingField = toDoc.get(name); SolrInputField existingField = toDoc.get(name);
if (existingField == null) return; if (existingField == null) return;
@SuppressWarnings({"rawtypes"}) @SuppressWarnings({"rawtypes"})
final Collection original = existingField.getValues(); final Collection<Object> original = existingField.getValues();
if (fieldVal instanceof Collection) { if (fieldVal instanceof Collection) {
for (Object object : (Collection) fieldVal) { for (Object object : (Collection) fieldVal) {
removeObj(original, object, name); removeObj(original, object, name);
@ -582,11 +568,11 @@ public class AtomicUpdateDocumentMerger {
return objValues.iterator().next() instanceof SolrDocumentBase; return objValues.iterator().next() instanceof SolrDocumentBase;
} }
private void removeObj(@SuppressWarnings({"rawtypes"})Collection original, Object toRemove, String fieldName) { private void removeObj(Collection<Object> original, Object toRemove, String fieldName) {
if(isChildDoc(toRemove)) { if(isChildDoc(toRemove)) {
removeChildDoc(original, (SolrInputDocument) toRemove); removeChildDoc(original, (SolrInputDocument) toRemove);
} else { } else {
original.remove(getNativeFieldValue(fieldName, toRemove)); removeFieldValueWithNumericFudging(fieldName, original, toRemove);
} }
} }
@ -600,6 +586,81 @@ public class AtomicUpdateDocumentMerger {
} }
} }
private void removeFieldValueWithNumericFudging(String fieldName, @SuppressWarnings({"rawtypes"}) Collection<Object> original, Object toRemove) {
if (original.size() == 0) {
return;
}
final BiConsumer<Collection<Object>, Object> removePredicate = (coll, existingElement) -> coll.remove(existingElement);
modifyCollectionBasedOnFuzzyPresence(fieldName, original, toRemove, removePredicate, null);
}
private void addValueIfDistinct(String fieldName, Collection<Object> original, Object toAdd) {
final BiConsumer<Collection<Object>, Object> addPredicate = (coll, newElement) -> coll.add(newElement);
modifyCollectionBasedOnFuzzyPresence(fieldName, original, toAdd, null, addPredicate);
}
/**
* Modifies a collection based on the (loosely-judged) presence or absence of a specific value
*
* Several classes of atomic update (notably 'remove' and 'add-distinct') rely on being able to identify whether an
* item is already present in a given list of values. Unfortunately the 'item' being checked for may be of different
* types based on the format of the user request and on where the existing document was pulled from (tlog vs index).
* As a result atomic updates needs a "fuzzy" way of checking presence and equality that is more flexible than
* traditional equality checks allow. This method does light type-checking to catch some of these more common cases
* (Long compared against Integers, String compared against Date, etc.), and calls the provided lambda to modify the
* field values as necessary.
*
* @param fieldName the field name involved in this atomic update operation
* @param original the list of values currently present in the existing document
* @param rawValue a value to be checked for in 'original'
* @param ifPresent a function to execute if rawValue was found in 'original'
* @param ifAbsent a function to execute if rawValue was not found in 'original'
*/
private void modifyCollectionBasedOnFuzzyPresence(String fieldName, Collection<Object> original, Object rawValue,
BiConsumer<Collection<Object>, Object> ifPresent,
BiConsumer<Collection<Object>, Object> ifAbsent) {
Object nativeValue = getNativeFieldValue(fieldName, rawValue);
Optional<Object> matchingValue = findObjectWithTypeFuzziness(original, rawValue, nativeValue);
if (matchingValue.isPresent() && ifPresent != null) {
ifPresent.accept(original, matchingValue.get());
} else if(matchingValue.isEmpty() && ifAbsent != null) {
ifAbsent.accept(original, rawValue);
}
}
private Optional<Object> findObjectWithTypeFuzziness(Collection<Object> original, Object rawValue, Object nativeValue) {
if (nativeValue instanceof Double || nativeValue instanceof Float) {
final Number nativeAsNumber = (Number) nativeValue;
return original.stream().filter(val ->
val.equals(rawValue) ||
val.equals(nativeValue) ||
(val instanceof Number && ((Number) val).doubleValue() == nativeAsNumber.doubleValue()) ||
(val instanceof String && val.equals(nativeAsNumber.toString())))
.findFirst();
} else if (nativeValue instanceof Long || nativeValue instanceof Integer) {
final Number nativeAsNumber = (Number) nativeValue;
return original.stream().filter(val ->
val.equals(rawValue) ||
val.equals(nativeValue) ||
(val instanceof Number && ((Number) val).longValue() == nativeAsNumber.longValue()) ||
(val instanceof String && val.equals(nativeAsNumber.toString())))
.findFirst();
} else if (nativeValue instanceof Date) {
return original.stream().filter(val ->
val.equals(rawValue) ||
val.equals(nativeValue) ||
(val instanceof String && DateMathParser.parseMath(null, (String)val).equals(nativeValue)))
.findFirst();
} else if (original.contains(nativeValue)) {
return Optional.of(nativeValue);
} else if (original.contains(rawValue)) {
return Optional.of(rawValue);
} else {
return Optional.empty();
}
}
/** /**
* *
* @param doc document to search for * @param doc document to search for

View File

@ -0,0 +1,370 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.time.Instant;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
/**
* Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
*
* {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent
* changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
* after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example.
*/
public class AtomicUpdateJavabinTest extends SolrCloudTestCase {
private static final String COMMITTED_DOC_ID = "1";
private static final String COMMITTED_DOC_STR_VALUES_ID = "1s";
private static final String UNCOMMITTED_DOC_ID = "2";
private static final String UNCOMMITTED_DOC_STR_VALUES_ID = "2s";
private static final String COLLECTION = "collection1";
private static final int NUM_SHARDS = 1;
private static final int NUM_REPLICAS = 1;
private static final Date DATE_1 = Date.from(Instant.ofEpochSecond(1554243309));
private static final String DATE_1_STR = "2019-04-02T22:15:09Z";
private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243609));
private static final String DATE_2_STR = "2019-04-02T22:20:09Z";
private static final Date DATE_3 = Date.from(Instant.ofEpochSecond(1554243909));
private static final String DATE_3_STR = "2019-04-02T22:25:09Z";
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(1)
.addConfig("conf", configset("cloud-dynamic"))
.configure();
CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
.process(cluster.getSolrClient());
cluster.waitForActiveCollection(COLLECTION, 1, 1);
}
@Before
public void setUp() throws Exception {
super.setUp();
final SolrInputDocument committedDoc = sdoc(
"id", COMMITTED_DOC_ID,
"title_s", "title_1", "title_s", "title_2",
"tv_mv_text", "text_1", "tv_mv_text", "text_2",
"count_is", 1, "count_is", 2,
"count_md", 1.0, "count_md", 2.0,
"timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
final SolrInputDocument committedStrDoc = sdoc(
"id", COMMITTED_DOC_STR_VALUES_ID,
"title_s", "title_1", "title_s", "title_2",
"tv_mv_text", "text_1", "tv_mv_text", "text_2",
"count_is", "1", "count_is", "2",
"count_md", "1.0", "count_md", "2.0",
"timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
final UpdateRequest committedRequest = new UpdateRequest()
.add(committedDoc)
.add(committedStrDoc);
committedRequest.commit(cluster.getSolrClient(), COLLECTION);
// Upload a copy of id:1 that's uncommitted to test how atomic-updates modify values in the tlog
// See SOLR-14971 for an example of why this case needs tested separately
final SolrInputDocument uncommittedDoc = sdoc(
"id", UNCOMMITTED_DOC_ID,
"title_s", "title_1", "title_s", "title_2",
"tv_mv_text", "text_1", "tv_mv_text", "text_2",
"count_is", 1, "count_is", 2,
"count_md", 1.0, "count_md", 2.0,
"timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
final SolrInputDocument uncommittedStrDoc = sdoc(
"id", UNCOMMITTED_DOC_STR_VALUES_ID,
"title_s", "title_1", "title_s", "title_2",
"tv_mv_text", "text_1", "tv_mv_text", "text_2",
"count_is", "1", "count_is", "2",
"count_md", "1.0", "count_md", "2.0",
"timestamps_mdt", DATE_1_STR, "timestamps_mdt", DATE_2_STR);
final UpdateRequest uncommittedRequest = new UpdateRequest()
.add(uncommittedDoc)
.add(uncommittedStrDoc);
uncommittedRequest.process(cluster.getSolrClient(), COLLECTION);
}
@Test
public void testAtomicUpdateRemovalOfStrField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicRemoveValueFromField(COMMITTED_DOC_ID, "title_s", "title_1");
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "title_s", "title_1");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_2");
}
@Test
public void testAtomicUpdateRemovalOfTextField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicRemoveValueFromField(COMMITTED_DOC_ID, "tv_mv_text", "text_1");
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
}
@Test
public void testAtomicUpdateRemovalOfIntField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_is", 1);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_is", 1);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 2);
}
@Test
public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicRemoveValueFromField(COMMITTED_DOC_ID, "count_md", 1.0);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 2.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "count_md", 1.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 2.0);
}
@Test
public void testAtomicUpdateRemovalOfDateField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicRemoveValueFromField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicRemoveValueFromField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
}
@Test
public void testAtomicUpdateAddDistinctOfDistinctValueOnStrField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_3");
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_3");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2", "title_3");
}
@Test
public void testAtomicUpdateAddDistinctOfDuplicateValueOnStrField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "title_s", "title_2");
ensureFieldHasValues(COMMITTED_DOC_ID, "title_s", "title_1", "title_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "title_s", "title_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "title_s", "title_1", "title_2");
}
@Test
public void testAtomicUpdateAddDistinctOfDistinctValueOnTextField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_3");
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_3");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2", "text_3");
}
@Test
public void testAtomicUpdateAddDistinctOfDuplicateValueOnTextField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "tv_mv_text", "text_2");
ensureFieldHasValues(COMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_2");
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "tv_mv_text", "text_1", "text_2");
}
@Test
public void testAtomicUpdateAddDistinctOfDistinctValueOnIntField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 3);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2, 3);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 3);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2, 3);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 3);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2, 3);
}
@Test
public void testAtomicUpdateAddDistinctOfDuplicateValueOnIntField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_is", 2);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_is", 1, 2);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_is", 2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_is", 1, 2);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 2);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_is", 1, 2);
}
@Test
public void testAtomicUpdateAddDistinctOfDistinctValueOnDoubleField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 3.0);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 3.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0, 3.0);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 3.0);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0, 3.0);
}
@Test
public void testAtomicUpdateAddDistinctOfDuplicateValueOnDoubleField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "count_md", 2.0);
ensureFieldHasValues(COMMITTED_DOC_ID, "count_md", 1.0, 2.0);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "count_md", 2.0);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "count_md", 1.0, 2.0);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 2.0);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "count_md", 1.0, 2.0);
}
@Test
public void testAtomicUpdateAddDistinctOfDistinctValueOnDateField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_3);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_3);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2, DATE_3);
}
@Test
public void testAtomicUpdateAddDistinctOfDuplicateValueOnDateField() throws Exception {
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(COMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
ensureFieldHasValues(COMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
ensureFieldHasValues(COMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_2);
ensureFieldHasValues(UNCOMMITTED_DOC_ID, "timestamps_mdt", DATE_1, DATE_2);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
atomicAddDistinctValueToField(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_2);
ensureFieldHasValues(UNCOMMITTED_DOC_STR_VALUES_ID, "timestamps_mdt", DATE_1, DATE_2);
}
private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
final SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", docId);
Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
atomicUpdateRemoval.put("remove", value);
doc.setField(fieldName, atomicUpdateRemoval);
cluster.getSolrClient().add(COLLECTION, doc);
}
private void atomicAddDistinctValueToField(String docId, String fieldName, Object value) throws Exception {
final SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", docId);
Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
atomicUpdateRemoval.put("add-distinct", value);
doc.setField(fieldName, atomicUpdateRemoval);
cluster.getSolrClient().add(COLLECTION, doc);
}
private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
final ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set("id", identifyingDocId);
QueryRequest request = new QueryRequest(solrParams);
request.setPath("/get");
final QueryResponse response = request.process(cluster.getSolrClient(), COLLECTION);
final NamedList<Object> rawResponse = response.getResponse();
assertTrue(rawResponse.get("doc") != null);
assertTrue(rawResponse.get("doc") instanceof SolrDocument);
final SolrDocument doc = (SolrDocument) rawResponse.get("doc");
final Collection<Object> valuesAfterUpdate = doc.getFieldValues(fieldName);
assertEquals("Expected field to have " + expectedValues.length + " values, but found " + valuesAfterUpdate.size(),
expectedValues.length, valuesAfterUpdate.size());
for (Object expectedValue: expectedValues) {
assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
}
}
}

View File

@ -1,132 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.time.Instant;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests Solr's atomic-update functionality using requests sent through SolrJ using wt=javabin
*
* {@link AtomicUpdatesTest} covers some of the same functionality, but does so by making xml-based requests. Recent
* changes to Solr have made it possible for the same data sent with different formats to result in different NamedLists
* after unmarshalling, so the test duplication is now necessary. See SOLR-13331 for an example.
*/
public class AtomicUpdateRemovalJavabinTest extends SolrCloudTestCase {
private static final String COLLECTION = "collection1";
private static final int NUM_SHARDS = 1;
private static final int NUM_REPLICAS = 1;
private static final Date DATE_1 = new Date();
private static final Date DATE_2 = Date.from(Instant.ofEpochSecond(1554243909));
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(1)
.addConfig("conf", configset("cloud-dynamic"))
.configure();
CollectionAdminRequest.createCollection(COLLECTION, "conf", NUM_SHARDS, NUM_REPLICAS)
.process(cluster.getSolrClient());
cluster.waitForActiveCollection(COLLECTION, 1, 1);
final SolrInputDocument doc1 = sdoc(
"id", "1",
"title_s", "title_1", "title_s", "title_2",
"tv_mv_text", "text_1", "tv_mv_text", "text_2",
"count_is", 1, "count_is", 2,
"count_md", 1.0, "count_md", 2.0,
"timestamps_mdt", DATE_1, "timestamps_mdt", DATE_2);
final UpdateRequest req = new UpdateRequest()
.add(doc1);
req.commit(cluster.getSolrClient(), COLLECTION);
}
@Test
public void testAtomicUpdateRemovalOfStrField() throws Exception {
ensureFieldHasValues("1", "title_s", "title_1", "title_2");
atomicRemoveValueFromField("1", "title_s", "title_1");
ensureFieldHasValues("1", "title_s", "title_2");
}
@Test
public void testAtomicUpdateRemovalOfTextField() throws Exception {
ensureFieldHasValues("1", "tv_mv_text", "text_1", "text_2");
atomicRemoveValueFromField("1", "tv_mv_text", "text_1");
ensureFieldHasValues("1", "tv_mv_text", "text_2");
}
@Test
public void testAtomicUpdateRemovalOfIntField() throws Exception {
ensureFieldHasValues("1", "count_is", 1, 2);
atomicRemoveValueFromField("1", "count_is", 1);
ensureFieldHasValues("1", "count_is", 2);
}
@Test
public void testAtomicUpdateRemovalOfDoubleField() throws Exception {
ensureFieldHasValues("1", "count_md", 1.0, 2.0);
atomicRemoveValueFromField("1", "count_md", 1.0);
ensureFieldHasValues("1", "count_md", 2.0);
}
@Test
public void testAtomicUpdateRemovalOfDateField() throws Exception {
ensureFieldHasValues("1", "timestamps_mdt", DATE_1, DATE_2);
atomicRemoveValueFromField("1", "timestamps_mdt", DATE_1);
ensureFieldHasValues("1", "timestamps_mdt", DATE_2);
}
private void atomicRemoveValueFromField(String docId, String fieldName, Object value) throws Exception {
final SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", docId);
Map<String, Object> atomicUpdateRemoval = new HashMap<>(1);
atomicUpdateRemoval.put("remove", value);
doc.setField(fieldName, atomicUpdateRemoval);
cluster.getSolrClient().add(COLLECTION, doc);
cluster.getSolrClient().commit(COLLECTION);
}
private void ensureFieldHasValues(String identifyingDocId, String fieldName, Object... expectedValues) throws Exception {
final SolrClient client = cluster.getSolrClient();
final QueryResponse response = client.query(COLLECTION, new SolrQuery("id:" + identifyingDocId));
final SolrDocumentList docs = response.getResults();
assertEquals(1, docs.getNumFound());
final Collection<Object> valuesAfterUpdate = docs.get(0).getFieldValues(fieldName);
assertEquals(expectedValues.length, valuesAfterUpdate.size());
for (Object expectedValue: expectedValues) {
assertTrue("Expected value [" + expectedValue + "] was not found in field", valuesAfterUpdate.contains(expectedValue));
}
}
}

View File

@ -181,6 +181,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']"); assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']"); assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
// Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
doc = new SolrInputDocument();
doc.setField("id", "4242");
doc.setField("values_is", new String[] {"111", "222", "333"});
assertU(adoc(doc));
doc = new SolrInputDocument();
doc.setField("id", "4242");
doc.setField("values_is", ImmutableMap.of("remove", 111));
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
} }
@ -251,6 +267,22 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']"); assertQ(req("q", "intRemove:[* TO *]", "indent", "true"), "//result[@numFound = '4']");
assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']"); assertQ(req("q", "intRemove:111", "indent", "true"), "//result[@numFound = '3']");
// Test that mv int fields can have values removed prior to being committed to index (see SOLR-14971)
doc = new SolrInputDocument();
doc.setField("id", "4242");
doc.setField("values_is", new Integer[] {111, 222, 333});
assertU(adoc(doc));
doc = new SolrInputDocument();
doc.setField("id", "4242");
doc.setField("values_is", ImmutableMap.of("remove", 111));
assertU(adoc(doc));
assertU(commit());
assertQ(req("q", "values_is:111", "indent", "true"), "//result[@numFound = '0']");
assertQ(req("q", "values_is:222", "indent", "true"), "//result[@numFound = '1']");
assertQ(req("q", "values_is:333", "indent", "true"), "//result[@numFound = '1']");
} }
@Test @Test