From 1464513f349950424861d173db447117b2def2d3 Mon Sep 17 00:00:00 2001 From: Koji Sekiguchi Date: Thu, 21 Jul 2011 07:09:27 +0000 Subject: [PATCH] SOLR-2584: add UniqFieldsUpdateProcessor git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1149050 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../UniqFieldsUpdateProcessorFactory.java | 108 +++++++++++++++ .../src/test-files/solr/conf/schema12.xml | 7 + .../src/test-files/solr/conf/solrconfig.xml | 10 ++ .../UniqFieldsUpdateProcessorFactoryTest.java | 123 ++++++++++++++++++ 5 files changed, 251 insertions(+) create mode 100644 solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java create mode 100644 solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 58c1bdea209..7d78943f88a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -313,6 +313,9 @@ New Features * LUCENE-2048: Added omitPositions to the schema, so you can omit position information while still indexing term frequencies. (rmuir) +* SOLR-2584: add UniqFieldsUpdateProcessor that removes duplicate values in the + specified fields. (Elmer Garduno, koji) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java new file mode 100644 index 00000000000..5b139f00839 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java @@ -0,0 +1,108 @@ +package org.apache.solr.update.processor; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; + +/** + * A non-duplicate processor. Removes duplicates in the specified fields. + * + *
+ * <updateRequestProcessorChain name="uniq-fields">
+ *   <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
+ *     <lst name="fields">
+ *       <str>uniq</str>
+ *       <str>uniq2</str>
+ *       <str>uniq3</str>
+ *     </lst>      
+ *   </processor>
+ *   <processor class="solr.RunUpdateProcessorFactory" />
+ * </updateRequestProcessorChain>
+ * + */ +public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory { + + private Set fields; + + @SuppressWarnings("unchecked") + @Override + public void init(@SuppressWarnings("rawtypes") NamedList args) { + NamedList flst = (NamedList)args.get("fields"); + if(flst != null){ + fields = new HashSet(); + for(int i = 0; i < flst.size(); i++){ + fields.add(flst.getVal(i)); + } + } + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new UniqFieldsUpdateProcessor(next, fields); + } + + public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor { + + private final Set fields; + + public UniqFieldsUpdateProcessor(UpdateRequestProcessor next, + Set fields) { + super(next); + this.fields = fields; + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + if(fields != null){ + SolrInputDocument solrInputDocument = cmd.getSolrInputDocument(); + List uniqList = new ArrayList(); + for (String field : fields) { + uniqList.clear(); + Collection col = solrInputDocument.getFieldValues(field); + if (col != null) { + for (Object o : col) { + if(!uniqList.contains(o)) + uniqList.add(o); + } + solrInputDocument.remove(field); + for (Object o : uniqList) { + solrInputDocument.addField(field, o); + } + } + } + } + super.processAdd(cmd); + } + } +} + + + diff --git a/solr/core/src/test-files/solr/conf/schema12.xml b/solr/core/src/test-files/solr/conf/schema12.xml index c998a89f563..053a86ed997 100755 --- a/solr/core/src/test-files/solr/conf/schema12.xml +++ b/solr/core/src/test-files/solr/conf/schema12.xml @@ -523,6 +523,13 @@ + + + + + + + diff --git a/solr/core/src/test-files/solr/conf/solrconfig.xml b/solr/core/src/test-files/solr/conf/solrconfig.xml index 9b40b1551a2..64b5a4216e4 100644 --- a/solr/core/src/test-files/solr/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/conf/solrconfig.xml @@ -491,5 +491,15 @@ + + + + uniq + uniq2 + uniq3 + + + + diff --git a/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java new file mode 100644 index 00000000000..4d3634eb54a --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactoryTest.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.UpdateParams; +import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.ContentStreamBase; +import org.apache.solr.core.SolrCore; +import org.apache.solr.handler.XmlUpdateRequestHandler; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.response.SolrQueryResponse; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + */ +public class UniqFieldsUpdateProcessorFactoryTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema12.xml"); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + clearIndex(); + assertU(commit()); + } + + @Test + public void testUniqFields() throws Exception { + SolrCore core = h.getCore(); + UpdateRequestProcessorChain chained = core + .getUpdateProcessingChain("uniq-fields"); + UniqFieldsUpdateProcessorFactory factory = ((UniqFieldsUpdateProcessorFactory) chained + .getFactories()[0]); + assertNotNull(chained); + + addDoc(adoc("id", "1a", + "uniq", "value1", + "uniq", "value1", + "uniq", "value2")); + addDoc(adoc("id", "2a", + "uniq2", "value1", + "uniq2", "value2", + "uniq2", "value1", + "uniq2", "value3", + "uniq", "value1", + "uniq", "value1")); + addDoc(adoc("id", "1b", + "uniq3", "value1", + "uniq3", "value1")); + addDoc(adoc("id", "1c", + "nouniq", "value1", + "nouniq", "value1", + "nouniq", "value2")); + addDoc(adoc("id", "2c", + "nouniq", "value1", + "nouniq", "value1", + "nouniq", "value2", + "uniq2", "value1", + "uniq2", "value1")); + + assertU(commit()); + assertQ(req("id:1a"), "count(//*[@name='uniq']/*)=2", + "//arr[@name='uniq']/str[1][.='value1']", + "//arr[@name='uniq']/str[2][.='value2']"); + assertQ(req("id:2a"), "count(//*[@name='uniq2']/*)=3", + "//arr[@name='uniq2']/str[1][.='value1']", + "//arr[@name='uniq2']/str[2][.='value2']", + "//arr[@name='uniq2']/str[3][.='value3']"); + assertQ(req("id:2a"), "count(//*[@name='uniq']/*)=1"); + assertQ(req("id:1b"), "count(//*[@name='uniq3'])=1"); + assertQ(req("id:1c"), "count(//*[@name='nouniq']/*)=3"); + assertQ(req("id:2c"), "count(//*[@name='nouniq']/*)=3"); + assertQ(req("id:2c"), "count(//*[@name='uniq2']/*)=1"); + + } + + private void addDoc(String doc) throws Exception { + Map params = new HashMap(); + MultiMapSolrParams mmparams = new MultiMapSolrParams(params); + params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uniq-fields" }); + SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), + (SolrParams) mmparams) { + }; + + XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler(); + handler.init(null); + ArrayList streams = new ArrayList(2); + streams.add(new ContentStreamBase.StringStream(doc)); + req.setContentStreams(streams); + handler.handleRequestBody(req, new SolrQueryResponse()); + req.close(); + } +}