From 83e07fec099c12e31f0fe7abe8b77cd3839085dd Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 18 Jul 2012 16:52:42 +0000 Subject: [PATCH] SOLR-3215: Clone SolrInputDocument when distrib indexing so that update processors after the distrib update process do not process the document twice. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1363013 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../processor/DistributedUpdateProcessor.java | 9 ++- .../org/apache/solr/TestDocumentBuilder.java | 71 +++++++++++++++++++ .../apache/solr/common/SolrInputDocument.java | 11 ++- .../apache/solr/common/SolrInputField.java | 15 ++++ 5 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/TestDocumentBuilder.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d9e9149e97f..e1cbdffaf95 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -117,6 +117,9 @@ Other Changes * SOLR-3600: Raise the default zkClientTimeout from 10 seconds to 15 seconds. (Mark Miller) +* SOLR-3215: Clone SolrInputDocument when distrib indexing so that update processors after + the distrib update process do not process the document twice. (Mark Miller) + ================== 4.0.0-ALPHA ================== More information about this release, including any errata related to the release notes, upgrade instructions, or other changes may be found online at: diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java index 426d580ca05..f7323c9f5ff 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java @@ -285,7 +285,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { boolean dropCmd = false; if (!forwardToLeader) { - dropCmd = versionAdd(cmd); + // clone the original doc + SolrInputDocument clonedDoc = cmd.solrDoc.deepCopy(); + dropCmd = versionAdd(cmd, clonedDoc); + cmd.solrDoc = clonedDoc; } if (dropCmd) { @@ -393,10 +396,11 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { /** * @param cmd + * @param cloneDoc needs the version if it's assigned * @return whether or not to drop this cmd * @throws IOException */ - private boolean versionAdd(AddUpdateCommand cmd) throws IOException { + private boolean versionAdd(AddUpdateCommand cmd, SolrInputDocument cloneDoc) throws IOException { BytesRef idBytes = cmd.getIndexedId(); if (vinfo == null || idBytes == null) { @@ -469,6 +473,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor { long version = vinfo.getNewClock(); cmd.setVersion(version); cmd.getSolrInputDocument().setField(VersionInfo.VERSION_FIELD, version); + cloneDoc.setField(VersionInfo.VERSION_FIELD, version); bucket.updateHighest(version); } else { // The leader forwarded us this update. diff --git a/solr/core/src/test/org/apache/solr/TestDocumentBuilder.java b/solr/core/src/test/org/apache/solr/TestDocumentBuilder.java new file mode 100644 index 00000000000..e8b5a3f22b2 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/TestDocumentBuilder.java @@ -0,0 +1,71 @@ +package org.apache.solr; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.junit.Test; + + +public class TestDocumentBuilder extends LuceneTestCase { + + @Test + public void testDeepCopy() throws IOException { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("field1", "value1"); + doc.addField("field2", "value1"); + doc.addField("field3", "value2"); + doc.addField("field4", 15); + List list = new ArrayList(); + list.add(45); + list.add(33); + list.add(20); + doc.addField("field5", list); + doc.setDocumentBoost(5f); + + SolrInputDocument clone = doc.deepCopy(); + + System.out.println("doc1: "+ doc); + System.out.println("clone: "+ clone); + + assertNotSame(doc, clone); + + Collection fieldNames = doc.getFieldNames(); + for (String name : fieldNames) { + Collection values = doc.getFieldValues(name); + Collection cloneValues = clone.getFieldValues(name); + + assertEquals(values.size(), cloneValues.size()); + assertNotSame(values, cloneValues); + + Iterator cloneIt = cloneValues.iterator(); + for (Object value : values) { + Object cloneValue = cloneIt.next(); + assertSame(value, cloneValue); + } + } + } + +} diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java index bd096087db8..660802fe5a8 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java @@ -18,10 +18,10 @@ package org.apache.solr.common; import java.io.Serializable; +import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; -import java.util.Collection; import java.util.Set; /** @@ -182,6 +182,15 @@ public class SolrInputDocument implements Map, Iterable> entries = _fields.entrySet(); + for (Map.Entry fieldEntry : entries) { + clone._fields.put(fieldEntry.getKey(), fieldEntry.getValue().deepCopy()); + } + clone._documentBoost = _documentBoost; + return clone; + } //--------------------------------------------------- // MAP interface diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java index 48a346bf62f..18f06764d98 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java @@ -199,4 +199,19 @@ public class SolrInputField implements Iterable, Serializable { return name + ((boost == 1.0) ? "=" : ("("+boost+")=")) + value; } + + public SolrInputField deepCopy() { + SolrInputField clone = new SolrInputField(name); + clone.boost = boost; + // We can't clone here, so we rely on simple primitives + if (value instanceof Collection) { + Collection values = (Collection) value; + Collection cloneValues = new ArrayList(values.size()); + cloneValues.addAll(values); + clone.value = cloneValues; + } else { + clone.value = value; + } + return clone; + } }