mirror of https://github.com/apache/lucene.git
SOLR-2584: add UniqFieldsUpdateProcessor
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1149050 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
857ec84c15
commit
1464513f34
|
@ -313,6 +313,9 @@ New Features
|
||||||
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
|
* LUCENE-2048: Added omitPositions to the schema, so you can omit position
|
||||||
information while still indexing term frequencies. (rmuir)
|
information while still indexing term frequencies. (rmuir)
|
||||||
|
|
||||||
|
* SOLR-2584: add UniqFieldsUpdateProcessor that removes duplicate values in the
|
||||||
|
specified fields. (Elmer Garduno, koji)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
package org.apache.solr.update.processor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.update.AddUpdateCommand;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A non-duplicate processor. Removes duplicates in the specified fields.
|
||||||
|
*
|
||||||
|
* <pre class="prettyprint" >
|
||||||
|
* <updateRequestProcessorChain name="uniq-fields">
|
||||||
|
* <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
|
||||||
|
* <lst name="fields">
|
||||||
|
* <str>uniq</str>
|
||||||
|
* <str>uniq2</str>
|
||||||
|
* <str>uniq3</str>
|
||||||
|
* </lst>
|
||||||
|
* </processor>
|
||||||
|
* <processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
* </updateRequestProcessorChain></pre>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
|
||||||
|
|
||||||
|
private Set<String> fields;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Override
|
||||||
|
public void init(@SuppressWarnings("rawtypes") NamedList args) {
|
||||||
|
NamedList<String> flst = (NamedList<String>)args.get("fields");
|
||||||
|
if(flst != null){
|
||||||
|
fields = new HashSet<String>();
|
||||||
|
for(int i = 0; i < flst.size(); i++){
|
||||||
|
fields.add(flst.getVal(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||||
|
SolrQueryResponse rsp,
|
||||||
|
UpdateRequestProcessor next) {
|
||||||
|
return new UniqFieldsUpdateProcessor(next, fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
|
||||||
|
|
||||||
|
private final Set<String> fields;
|
||||||
|
|
||||||
|
public UniqFieldsUpdateProcessor(UpdateRequestProcessor next,
|
||||||
|
Set<String> fields) {
|
||||||
|
super(next);
|
||||||
|
this.fields = fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||||
|
if(fields != null){
|
||||||
|
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||||
|
List<Object> uniqList = new ArrayList<Object>();
|
||||||
|
for (String field : fields) {
|
||||||
|
uniqList.clear();
|
||||||
|
Collection<Object> col = solrInputDocument.getFieldValues(field);
|
||||||
|
if (col != null) {
|
||||||
|
for (Object o : col) {
|
||||||
|
if(!uniqList.contains(o))
|
||||||
|
uniqList.add(o);
|
||||||
|
}
|
||||||
|
solrInputDocument.remove(field);
|
||||||
|
for (Object o : uniqList) {
|
||||||
|
solrInputDocument.addField(field, o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
super.processAdd(cmd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -523,6 +523,13 @@
|
||||||
<field name="pointD" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
<field name="pointD" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
||||||
<field name="point_hash" type="geohash" indexed="true" stored="true" multiValued="false"/>
|
<field name="point_hash" type="geohash" indexed="true" stored="true" multiValued="false"/>
|
||||||
<field name="store" type="location" indexed="true" stored="true"/>
|
<field name="store" type="location" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<!-- to test uniq fields -->
|
||||||
|
<field name="uniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="uniq2" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="uniq3" type="string" indexed="true" stored="true"/>
|
||||||
|
<field name="nouniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
|
||||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -491,5 +491,15 @@
|
||||||
</processor>
|
</processor>
|
||||||
<processor class="solr.RunUpdateProcessorFactory" />
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
</updateRequestProcessorChain>
|
</updateRequestProcessorChain>
|
||||||
|
<updateRequestProcessorChain name="uniq-fields">
|
||||||
|
<processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
|
||||||
|
<lst name="fields">
|
||||||
|
<str>uniq</str>
|
||||||
|
<str>uniq2</str>
|
||||||
|
<str>uniq3</str>
|
||||||
|
</lst>
|
||||||
|
</processor>
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
|
||||||
</config>
|
</config>
|
||||||
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.update.processor;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.params.UpdateParams;
|
||||||
|
import org.apache.solr.common.util.ContentStream;
|
||||||
|
import org.apache.solr.common.util.ContentStreamBase;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.handler.XmlUpdateRequestHandler;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryRequestBase;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class UniqFieldsUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml", "schema12.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
clearIndex();
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUniqFields() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
UpdateRequestProcessorChain chained = core
|
||||||
|
.getUpdateProcessingChain("uniq-fields");
|
||||||
|
UniqFieldsUpdateProcessorFactory factory = ((UniqFieldsUpdateProcessorFactory) chained
|
||||||
|
.getFactories()[0]);
|
||||||
|
assertNotNull(chained);
|
||||||
|
|
||||||
|
addDoc(adoc("id", "1a",
|
||||||
|
"uniq", "value1",
|
||||||
|
"uniq", "value1",
|
||||||
|
"uniq", "value2"));
|
||||||
|
addDoc(adoc("id", "2a",
|
||||||
|
"uniq2", "value1",
|
||||||
|
"uniq2", "value2",
|
||||||
|
"uniq2", "value1",
|
||||||
|
"uniq2", "value3",
|
||||||
|
"uniq", "value1",
|
||||||
|
"uniq", "value1"));
|
||||||
|
addDoc(adoc("id", "1b",
|
||||||
|
"uniq3", "value1",
|
||||||
|
"uniq3", "value1"));
|
||||||
|
addDoc(adoc("id", "1c",
|
||||||
|
"nouniq", "value1",
|
||||||
|
"nouniq", "value1",
|
||||||
|
"nouniq", "value2"));
|
||||||
|
addDoc(adoc("id", "2c",
|
||||||
|
"nouniq", "value1",
|
||||||
|
"nouniq", "value1",
|
||||||
|
"nouniq", "value2",
|
||||||
|
"uniq2", "value1",
|
||||||
|
"uniq2", "value1"));
|
||||||
|
|
||||||
|
assertU(commit());
|
||||||
|
assertQ(req("id:1a"), "count(//*[@name='uniq']/*)=2",
|
||||||
|
"//arr[@name='uniq']/str[1][.='value1']",
|
||||||
|
"//arr[@name='uniq']/str[2][.='value2']");
|
||||||
|
assertQ(req("id:2a"), "count(//*[@name='uniq2']/*)=3",
|
||||||
|
"//arr[@name='uniq2']/str[1][.='value1']",
|
||||||
|
"//arr[@name='uniq2']/str[2][.='value2']",
|
||||||
|
"//arr[@name='uniq2']/str[3][.='value3']");
|
||||||
|
assertQ(req("id:2a"), "count(//*[@name='uniq']/*)=1");
|
||||||
|
assertQ(req("id:1b"), "count(//*[@name='uniq3'])=1");
|
||||||
|
assertQ(req("id:1c"), "count(//*[@name='nouniq']/*)=3");
|
||||||
|
assertQ(req("id:2c"), "count(//*[@name='nouniq']/*)=3");
|
||||||
|
assertQ(req("id:2c"), "count(//*[@name='uniq2']/*)=1");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDoc(String doc) throws Exception {
|
||||||
|
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||||
|
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||||
|
params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uniq-fields" });
|
||||||
|
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
|
||||||
|
(SolrParams) mmparams) {
|
||||||
|
};
|
||||||
|
|
||||||
|
XmlUpdateRequestHandler handler = new XmlUpdateRequestHandler();
|
||||||
|
handler.init(null);
|
||||||
|
ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
|
||||||
|
streams.add(new ContentStreamBase.StringStream(doc));
|
||||||
|
req.setContentStreams(streams);
|
||||||
|
handler.handleRequestBody(req, new SolrQueryResponse());
|
||||||
|
req.close();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue