mirror of https://github.com/apache/lucene.git
SOLR-4249: UniqFieldsUpdateProcessorFactory now extends FieldMutatingUpdateProcessorFactory and supports all of it's selector options
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1518717 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d339f4278c
commit
13bedbe616
|
@ -71,6 +71,11 @@ Upgrading from Solr 4.4.0
|
|||
XXXXX and delete the ones that you do not wish to use. See SOLR-4953 &
|
||||
SOLR-5108 for more details.
|
||||
|
||||
* The UniqFieldsUpdateProcessorFactory has been improved to support all of the
|
||||
FieldMutatingUpdateProcessorFactory selector options. The <lst named="fields">
|
||||
init param option is now deprecated and should be replaced with the more standard
|
||||
<arr name="fieldName">. See SOLR-4249 for more details.
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
|
@ -103,6 +108,11 @@ New Features
|
|||
* SOLR-5182: Add NoOpRegenerator, a regenerator for custom per-segment caches
|
||||
where items are preserved across commits. (Robert Muir)
|
||||
|
||||
* SOLR-4249: UniqFieldsUpdateProcessorFactory now extends
|
||||
FieldMutatingUpdateProcessorFactory and supports all of it's selector options. Use
|
||||
of the "fields" init param is now deprecated in favor of "fieldName" (hossman)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -23,6 +23,9 @@ import java.util.Collection;
|
|||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -30,77 +33,68 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
||||
/**
|
||||
* A non-duplicate processor. Removes duplicates in the specified fields.
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <updateRequestProcessorChain name="uniq-fields">
|
||||
* <processor class="org.apache.solr.update.processor.UniqFieldsUpdateProcessorFactory">
|
||||
* <lst name="fields">
|
||||
* <str>uniq</str>
|
||||
* <str>uniq2</str>
|
||||
* <str>uniq3</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* <processor class="solr.RunUpdateProcessorFactory" />
|
||||
* </updateRequestProcessorChain></pre>
|
||||
*
|
||||
*/
|
||||
public class UniqFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory {
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
private Set<String> fields;
|
||||
/**
|
||||
* Removes duplicate values found in fields matching the specified conditions.
|
||||
* The existing field values are iterated in order, and values are removed when
|
||||
* they are equal to a value that has already been seen for this field.
|
||||
* <p>
|
||||
* By default this processor matches no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* In the example configuration below, if a document initially contains the values
|
||||
* <code>"Steve","Lucy","Jim",Steve","Alice","Bob","Alice"</code> in a field named
|
||||
* <code>foo_uniq</code> then using this processor will result in the final list of
|
||||
* field values being <code>"Steve","Lucy","Jim","Alice","Bob"</code>
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.UniqFieldsUpdateProcessorFactory">
|
||||
* <str name="fieldRegex">.*_uniq</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*/
|
||||
public class UniqFieldsUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
|
||||
|
||||
public final static Logger log = LoggerFactory.getLogger(UniqFieldsUpdateProcessorFactory.class);
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(@SuppressWarnings("rawtypes") NamedList args) {
|
||||
NamedList<String> flst = (NamedList<String>)args.get("fields");
|
||||
// legacy init param support, will be removed in 5.0
|
||||
// no idea why this was ever implimented as <lst> should have just been <arr>
|
||||
NamedList<String> flst = (NamedList<String>) args.remove("fields");
|
||||
if(flst != null){
|
||||
fields = new HashSet<String>();
|
||||
for(int i = 0; i < flst.size(); i++){
|
||||
fields.add(flst.getVal(i));
|
||||
log.warn("Use of the 'fields' init param in UniqFieldsUpdateProcessorFactory is deprecated, please use 'fieldName' (or another FieldMutatingUpdateProcessorFactory selector option) instead");
|
||||
log.info("Replacing 'fields' init param with (individual) 'fieldName' params");
|
||||
for (Map.Entry<String,String> entry : flst) {
|
||||
args.add("fieldName", entry.getValue());
|
||||
}
|
||||
}
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new UniqFieldsUpdateProcessor(next, fields);
|
||||
}
|
||||
|
||||
public class UniqFieldsUpdateProcessor extends UpdateRequestProcessor {
|
||||
|
||||
private final Set<String> fields;
|
||||
|
||||
public UniqFieldsUpdateProcessor(UpdateRequestProcessor next,
|
||||
Set<String> fields) {
|
||||
super(next);
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
if(fields != null){
|
||||
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||
List<Object> uniqList = new ArrayList<Object>();
|
||||
for (String field : fields) {
|
||||
uniqList.clear();
|
||||
Collection<Object> col = solrInputDocument.getFieldValues(field);
|
||||
if (col != null) {
|
||||
for (Object o : col) {
|
||||
if(!uniqList.contains(o))
|
||||
uniqList.add(o);
|
||||
}
|
||||
solrInputDocument.remove(field);
|
||||
for (Object o : uniqList) {
|
||||
solrInputDocument.addField(field, o);
|
||||
}
|
||||
}
|
||||
}
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection pickSubset(Collection values) {
|
||||
Set<Object> uniqs = new HashSet<Object>();
|
||||
List<Object> result = new ArrayList<Object>(values.size());
|
||||
for (Object o : values) {
|
||||
if (!uniqs.contains(o)) {
|
||||
uniqs.add(o);
|
||||
result.add(o);
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -429,6 +429,12 @@
|
|||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uniq-values">
|
||||
<processor class="solr.UniqFieldsUpdateProcessorFactory">
|
||||
<str name="fieldRegex">uniq_.*</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="pre-analyzed-simple">
|
||||
<processor class="solr.PreAnalyzedUpdateProcessorFactory">
|
||||
<str name="fieldName">subject</str>
|
||||
|
|
|
@ -94,6 +94,28 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase {
|
|||
5.0F, d.getField("foo_s").getBoost(), 0.0F);
|
||||
}
|
||||
|
||||
public void testUniqValues() throws Exception {
|
||||
final String chain = "uniq-values";
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd(chain,
|
||||
doc(f("id", "1111"),
|
||||
f("name", "Hoss", "Man", "Hoss"),
|
||||
f("uniq_1_s", "Hoss", "Man", "Hoss"),
|
||||
f("uniq_2_s", "Foo", "Hoss", "Man", "Hoss", "Bar"),
|
||||
f("uniq_3_s", 5.0F, 23, "string", 5.0F)));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("Hoss", "Man", "Hoss"),
|
||||
d.getFieldValues("name"));
|
||||
assertEquals(Arrays.asList("Hoss","Man"),
|
||||
d.getFieldValues("uniq_1_s"));
|
||||
assertEquals(Arrays.asList("Foo","Hoss","Man","Bar"),
|
||||
d.getFieldValues("uniq_2_s"));
|
||||
assertEquals(Arrays.asList(5.0F, 23, "string"),
|
||||
d.getFieldValues("uniq_3_s"));
|
||||
}
|
||||
|
||||
public void testTrimFields() throws Exception {
|
||||
for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) {
|
||||
SolrInputDocument d = null;
|
||||
|
|
Loading…
Reference in New Issue