diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 58c1bdea209..7d78943f88a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -313,6 +313,9 @@ New Features
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
information while still indexing term frequencies. (rmuir)
+* SOLR-2584: add UniqFieldsUpdateProcessor that removes duplicate values in the
+ specified fields. (Elmer Garduno, koji)
+
Optimizations
----------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
new file mode 100644
index 00000000000..5b139f00839
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
@@ -0,0 +1,108 @@
+package org.apache.solr.update.processor;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+
+/**
+ * A non-duplicate processor. Removes duplicates in the specified fields.
+ *
+ *
+ * <updateRequestProcessorChain name="uniq-fields">
+ * <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
+ * <lst name="fields">
+ * <str>uniq</str>
+ * <str>uniq2</str>
+ * <str>uniq3</str>
+ * </lst>
+ * </processor>
+ * <processor class="solr.RunUpdateProcessorFactory" />
+ * </updateRequestProcessorChain>
+ *
+ */
+public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
+
+ private Set fields;
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void init(@SuppressWarnings("rawtypes") NamedList args) {
+ NamedList flst = (NamedList)args.get("fields");
+ if(flst != null){
+ fields = new HashSet();
+ for(int i = 0; i < flst.size(); i++){
+ fields.add(flst.getVal(i));
+ }
+ }
+ }
+
+ @Override
+ public UpdateRequestProcessor getInstance(SolrQueryRequest req,
+ SolrQueryResponse rsp,
+ UpdateRequestProcessor next) {
+ return new UniqFieldsUpdateProcessor(next, fields);
+ }
+
+ public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
+
+ private final Set fields;
+
+ public UniqFieldsUpdateProcessor(UpdateRequestProcessor next,
+ Set fields) {
+ super(next);
+ this.fields = fields;
+ }
+
+ @Override
+ public void processAdd(AddUpdateCommand cmd) throws IOException {
+ if(fields != null){
+ SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
+ List uniqList = new ArrayList();
+ for (String field : fields) {
+ uniqList.clear();
+ Collection col = solrInputDocument.getFieldValues(field);
+ if (col != null) {
+ for (Object o : col) {
+ if(!uniqList.contains(o))
+ uniqList.add(o);
+ }
+ solrInputDocument.remove(field);
+ for (Object o : uniqList) {
+ solrInputDocument.addField(field, o);
+ }
+ }
+ }
+ }
+ super.processAdd(cmd);
+ }
+ }
+}
+
+
+
diff --git a/solr/core/src/test-files/solr/conf/schema12.xml b/solr/core/src/test-files/solr/conf/schema12.xml
index c998a89f563..053a86ed997 100755
--- a/solr/core/src/test-files/solr/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/conf/schema12.xml
@@ -523,6 +523,13 @@
+
+
+
+
+
+
+
diff --git a/solr/core/src/test-files/solr/conf/solrconfig.xml b/solr/core/src/test-files/solr/conf/solrconfig.xml
index 9b40b1551a2..64b5a4216e4 100644
--- a/solr/core/src/test-files/solr/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/conf/solrconfig.xml
@@ -491,5 +491,15 @@
+
+
+
+ uniq
+ uniq2
+ uniq3
+
+
+
+
diff --git a/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java
new file mode 100644
index 00000000000..4d3634eb54a
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.MultiMapSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.UpdateParams;
+import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.XmlUpdateRequestHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.response.SolrQueryResponse;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ */
+public class UniqFieldsUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig.xml", "schema12.xml");
+ }
+
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ clearIndex();
+ assertU(commit());
+ }
+
+ @Test
+ public void testUniqFields() throws Exception {
+ SolrCore core = h.getCore();
+ UpdateRequestProcessorChain chained = core
+ .getUpdateProcessingChain("uniq-fields");
+ UniqFieldsUpdateProcessorFactory factory = ((UniqFieldsUpdateProcessorFactory) chained
+ .getFactories()[0]);
+ assertNotNull(chained);
+
+ addDoc(adoc("id", "1a",
+ "uniq", "value1",
+ "uniq", "value1",
+ "uniq", "value2"));
+ addDoc(adoc("id", "2a",
+ "uniq2", "value1",
+ "uniq2", "value2",
+ "uniq2", "value1",
+ "uniq2", "value3",
+ "uniq", "value1",
+ "uniq", "value1"));
+ addDoc(adoc("id", "1b",
+ "uniq3", "value1",
+ "uniq3", "value1"));
+ addDoc(adoc("id", "1c",
+ "nouniq", "value1",
+ "nouniq", "value1",
+ "nouniq", "value2"));
+ addDoc(adoc("id", "2c",
+ "nouniq", "value1",
+ "nouniq", "value1",
+ "nouniq", "value2",
+ "uniq2", "value1",
+ "uniq2", "value1"));
+
+ assertU(commit());
+ assertQ(req("id:1a"), "count(//*[@name='uniq']/*)=2",
+ "//arr[@name='uniq']/str[1][.='value1']",
+ "//arr[@name='uniq']/str[2][.='value2']");
+ assertQ(req("id:2a"), "count(//*[@name='uniq2']/*)=3",
+ "//arr[@name='uniq2']/str[1][.='value1']",
+ "//arr[@name='uniq2']/str[2][.='value2']",
+ "//arr[@name='uniq2']/str[3][.='value3']");
+ assertQ(req("id:2a"), "count(//*[@name='uniq']/*)=1");
+ assertQ(req("id:1b"), "count(//*[@name='uniq3'])=1");
+ assertQ(req("id:1c"), "count(//*[@name='nouniq']/*)=3");
+ assertQ(req("id:2c"), "count(//*[@name='nouniq']/*)=3");
+ assertQ(req("id:2c"), "count(//*[@name='uniq2']/*)=1");
+
+ }
+
+ private void addDoc(String doc) throws Exception {
+ Map params = new HashMap();
+ MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
+ params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uniq-fields" });
+ SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
+ (SolrParams) mmparams) {
+ };
+
+ XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
+ handler.init(null);
+ ArrayList streams = new ArrayList(2);
+ streams.add(new ContentStreamBase.StringStream(doc));
+ req.setContentStreams(streams);
+ handler.handleRequestBody(req, new SolrQueryResponse());
+ req.close();
+ }
+}