SOLR-3200 and SOLR-3226: fixing SignatureUpdateProcessorFactory to include non-String values, and to deal with the 'all fields' case correctly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1308604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2012-04-02 22:37:12 +00:00
parent bf582b23b0
commit ae0a411041
5 changed files with 159 additions and 10 deletions

View File

@ -508,6 +508,13 @@ Upgrading from Solr 3.5
* SOLR-3161: Don't use the 'qt' parameter with a leading '/'. It probably won't work in 4.0
and it's now limited in 3.6 to SearchHandler subclasses that aren't lazy-loaded.
* Bugs found and fixed in the SignatureUpdateProcessor that previously caused
some documents to produce the same signature even when the configured fields
contained distinct (non-String) values. Users of SignatureUpdateProcessor
are strongly advised that they should re-index as document signatures may
have now changed. (see SOLR-3200 & SOLR-3226 for details)
New Features
----------------------
* SOLR-2020: Add Java client that uses Apache Http Components http client (4.x).
@ -756,6 +763,13 @@ Bug Fixes
* SOLR-3261: Fix edismax to respect query operators when literal colons
are used in query string. (Juan Grande via hossman)
* SOLR-3226: Fix SignatureUpdateProcessor to no longer ignore non-String
field values (Spyros Kapnissis, hossman)
* SOLR-3200: Fix SignatureUpdateProcessor "all fields" mode to use all
fields of each document instead of hte fields specified by the first
document indexed (Spyros Kapnissis via hossman)
Other Changes
----------------------
* SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji)

View File

@ -132,29 +132,30 @@ public class SignatureUpdateProcessorFactory
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (enabled) {
SolrInputDocument doc = cmd.getSolrInputDocument();
List<String> currDocSigFields = null;
if (sigFields == null || sigFields.size() == 0) {
Collection<String> docFields = doc.getFieldNames();
sigFields = new ArrayList<String>(docFields.size());
sigFields.addAll(docFields);
Collections.sort(sigFields);
currDocSigFields = new ArrayList<String>(docFields.size());
currDocSigFields.addAll(docFields);
Collections.sort(currDocSigFields);
} else {
currDocSigFields = sigFields;
}
Signature sig = (Signature) req.getCore().getResourceLoader().newInstance(signatureClass);
sig.init(params);
for (String field : sigFields) {
for (String field : currDocSigFields) {
SolrInputField f = doc.getField(field);
if (f != null) {
sig.add(field);
Object o = f.getValue();
if (o instanceof String) {
sig.add((String)o);
} else if (o instanceof Collection) {
if (o instanceof Collection) {
for (Object oo : (Collection)o) {
if (oo instanceof String) {
sig.add((String)oo);
}
sig.add(String.valueOf(oo));
}
} else {
sig.add(String.valueOf(o));
}
}
}

View File

@ -44,6 +44,7 @@
</fieldtype>
</types>
<fields>
<field name="id" type="string" indexed="true" stored="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<field name="text30" type="text30" indexed="true" stored="false" />
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
@ -51,4 +52,5 @@
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</fields>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -530,6 +530,16 @@
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="dedupe-allfields">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">false</bool>
<bool name="overwriteDupes">false</bool>
<str name="signatureField">id</str>
<str name="fields"></str>
<str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="stored_sig">
<!-- this chain is valid even though the signature field is not
indexed, because we are not asking for dups to be overwritten

View File

@ -22,6 +22,9 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
@ -29,9 +32,11 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.BinaryUpdateRequestHandler;
import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.junit.Before;
import org.junit.BeforeClass;
@ -67,6 +72,29 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
req.close();
}
}
@Test
public void testDupeAllFieldsDetection() throws Exception {
this.chain = "dedupe-allfields";
SolrCore core = h.getCore();
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(this.chain);
SignatureUpdateProcessorFactory factory = ((SignatureUpdateProcessorFactory) chained
.getFactories()[0]);
factory.setEnabled(true);
assertNotNull(chained);
addDoc(adoc("v_t", "Hello Dude man!"));
addDoc(adoc("v_t", "Hello Dude man!", "name", "name1'"));
addDoc(adoc("v_t", "Hello Dude man!", "name", "name2'"));
addDoc(commit());
checkNumDocs(3);
factory.setEnabled(false);
}
@Test
public void testDupeDetection() throws Exception {
@ -228,6 +256,100 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
assertTrue("Should have gotten an exception from inform(SolrCore)",
exception_ok);
}
@Test
public void testNonStringFieldsValues() throws Exception {
this.chain = "dedupe-allfields";
SolrCore core = h.getCore();
UpdateRequestProcessorChain chained = core
.getUpdateProcessingChain(chain);
SignatureUpdateProcessorFactory factory = ((SignatureUpdateProcessorFactory) chained
.getFactories()[0]);
factory.setEnabled(true);
Map<String,String[]> params = new HashMap<String,String[]>();
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
params.put(UpdateParams.UPDATE_CHAIN, new String[] {chain});
UpdateRequest ureq = new UpdateRequest();
{
SolrInputDocument doc = new SolrInputDocument();
doc.addField("v_t", "same");
doc.addField("weight", 1.0f);
doc.addField("ints_is", 34);
doc.addField("ints_is", 42);
ureq.add(doc);
}
{
SolrInputDocument doc = new SolrInputDocument();
doc.addField("v_t", "same");
doc.addField("weight", 2.0f);
doc.addField("ints_is", 42);
doc.addField("ints_is", 66);
ureq.add(doc);
}
{
// A and B should have same sig as eachother
// even though the particulars of how the the ints_is list are built
SolrInputDocument docA = new SolrInputDocument();
SolrInputDocument docB = new SolrInputDocument();
UnusualList<Integer> ints = new UnusualList<Integer>(3);
for (int val : new int[] {42, 66, 34}) {
docA.addField("ints_is", new Integer(val));
ints.add(val);
}
docB.addField("ints_is", ints);
for (SolrInputDocument doc : new SolrInputDocument[] { docA, docB }) {
doc.addField("v_t", "same");
doc.addField("weight", 3.0f);
ureq.add(doc);
}
}
{
// now add another doc with the same values as A & B above,
// but diff ints_is collection (diff order)
SolrInputDocument doc = new SolrInputDocument();
doc.addField("v_t", "same");
doc.addField("weight", 3.0f);
for (int val : new int[] {66, 42, 34}) {
doc.addField("ints_is", new Integer(val));
}
ureq.add(doc);
}
ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
streams.add(new BinaryRequestWriter().getContentStream(ureq));
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), mmparams);
try {
req.setContentStreams(streams);
BinaryUpdateRequestHandler h = new BinaryUpdateRequestHandler();
h.handleRequestBody(req, new SolrQueryResponse());
} finally {
req.close();
}
addDoc(commit());
checkNumDocs(4);
}
/** A list with an unusual toString */
private static final class UnusualList<T> extends ArrayList<T> {
public UnusualList(int size) {
super(size);
}
public String toString() {
return "UNUSUAL:" + super.toString();
}
}
private void addDoc(String doc) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>();