mirror of https://github.com/apache/lucene.git
SOLR-2584: add UniqFieldsUpdateProcessor
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1149050 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
857ec84c15
commit
1464513f34
|
@ -313,6 +313,9 @@ New Features
|
|||
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
|
||||
information while still indexing term frequencies. (rmuir)
|
||||
|
||||
* SOLR-2584: add UniqFieldsUpdateProcessor that removes duplicate values in the
|
||||
specified fields. (Elmer Garduno, koji)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
package org.apache.solr.update.processor;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
||||
/**
|
||||
* A non-duplicate processor. Removes duplicates in the specified fields.
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <updateRequestProcessorChain name="uniq-fields">
|
||||
* <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
|
||||
* <lst name="fields">
|
||||
* <str>uniq</str>
|
||||
* <str>uniq2</str>
|
||||
* <str>uniq3</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* <processor class="solr.RunUpdateProcessorFactory" />
|
||||
* </updateRequestProcessorChain></pre>
|
||||
*
|
||||
*/
|
||||
public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
|
||||
|
||||
private Set<String> fields;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(@SuppressWarnings("rawtypes") NamedList args) {
|
||||
NamedList<String> flst = (NamedList<String>)args.get("fields");
|
||||
if(flst != null){
|
||||
fields = new HashSet<String>();
|
||||
for(int i = 0; i < flst.size(); i++){
|
||||
fields.add(flst.getVal(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new UniqFieldsUpdateProcessor(next, fields);
|
||||
}
|
||||
|
||||
public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
|
||||
|
||||
private final Set<String> fields;
|
||||
|
||||
public UniqFieldsUpdateProcessor(UpdateRequestProcessor next,
|
||||
Set<String> fields) {
|
||||
super(next);
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
if(fields != null){
|
||||
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||
List<Object> uniqList = new ArrayList<Object>();
|
||||
for (String field : fields) {
|
||||
uniqList.clear();
|
||||
Collection<Object> col = solrInputDocument.getFieldValues(field);
|
||||
if (col != null) {
|
||||
for (Object o : col) {
|
||||
if(!uniqList.contains(o))
|
||||
uniqList.add(o);
|
||||
}
|
||||
solrInputDocument.remove(field);
|
||||
for (Object o : uniqList) {
|
||||
solrInputDocument.addField(field, o);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -523,6 +523,13 @@
|
|||
<field name="pointD" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="point_hash" type="geohash" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="store" type="location" indexed="true" stored="true"/>
|
||||
|
||||
<!-- to test uniq fields -->
|
||||
<field name="uniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="uniq2" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="uniq3" type="string" indexed="true" stored="true"/>
|
||||
<field name="nouniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
|
||||
|
||||
|
|
|
@ -491,5 +491,15 @@
|
|||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="uniq-fields">
|
||||
<processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
|
||||
<lst name="fields">
|
||||
<str>uniq</str>
|
||||
<str>uniq2</str>
|
||||
<str>uniq3</str>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
</config>
|
||||
|
|
|
@ -0,0 +1,123 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.XmlUpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class UniqFieldsUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
}
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
clearIndex();
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUniqFields() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain chained = core
|
||||
.getUpdateProcessingChain("uniq-fields");
|
||||
UniqFieldsUpdateProcessorFactory factory = ((UniqFieldsUpdateProcessorFactory) chained
|
||||
.getFactories()[0]);
|
||||
assertNotNull(chained);
|
||||
|
||||
addDoc(adoc("id", "1a",
|
||||
"uniq", "value1",
|
||||
"uniq", "value1",
|
||||
"uniq", "value2"));
|
||||
addDoc(adoc("id", "2a",
|
||||
"uniq2", "value1",
|
||||
"uniq2", "value2",
|
||||
"uniq2", "value1",
|
||||
"uniq2", "value3",
|
||||
"uniq", "value1",
|
||||
"uniq", "value1"));
|
||||
addDoc(adoc("id", "1b",
|
||||
"uniq3", "value1",
|
||||
"uniq3", "value1"));
|
||||
addDoc(adoc("id", "1c",
|
||||
"nouniq", "value1",
|
||||
"nouniq", "value1",
|
||||
"nouniq", "value2"));
|
||||
addDoc(adoc("id", "2c",
|
||||
"nouniq", "value1",
|
||||
"nouniq", "value1",
|
||||
"nouniq", "value2",
|
||||
"uniq2", "value1",
|
||||
"uniq2", "value1"));
|
||||
|
||||
assertU(commit());
|
||||
assertQ(req("id:1a"), "count(//*[@name='uniq']/*)=2",
|
||||
"//arr[@name='uniq']/str[1][.='value1']",
|
||||
"//arr[@name='uniq']/str[2][.='value2']");
|
||||
assertQ(req("id:2a"), "count(//*[@name='uniq2']/*)=3",
|
||||
"//arr[@name='uniq2']/str[1][.='value1']",
|
||||
"//arr[@name='uniq2']/str[2][.='value2']",
|
||||
"//arr[@name='uniq2']/str[3][.='value3']");
|
||||
assertQ(req("id:2a"), "count(//*[@name='uniq']/*)=1");
|
||||
assertQ(req("id:1b"), "count(//*[@name='uniq3'])=1");
|
||||
assertQ(req("id:1c"), "count(//*[@name='nouniq']/*)=3");
|
||||
assertQ(req("id:2c"), "count(//*[@name='nouniq']/*)=3");
|
||||
assertQ(req("id:2c"), "count(//*[@name='uniq2']/*)=1");
|
||||
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uniq-fields" });
|
||||
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
|
||||
(SolrParams) mmparams) {
|
||||
};
|
||||
|
||||
XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
|
||||
handler.init(null);
|
||||
ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
|
||||
streams.add(new ContentStreamBase.StringStream(doc));
|
||||
req.setContentStreams(streams);
|
||||
handler.handleRequestBody(req, new SolrQueryResponse());
|
||||
req.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue