mirror of https://github.com/apache/lucene.git
SOLR-3200 and SOLR-3226: fixing SignatureUpdateProcessorFactory to include non-String values, and to deal with the 'all fields' case correctly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1308604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bf582b23b0
commit
ae0a411041
|
@ -508,6 +508,13 @@ Upgrading from Solr 3.5
|
|||
* SOLR-3161: Don't use the 'qt' parameter with a leading '/'. It probably won't work in 4.0
|
||||
and it's now limited in 3.6 to SearchHandler subclasses that aren't lazy-loaded.
|
||||
|
||||
* Bugs found and fixed in the SignatureUpdateProcessor that previously caused
|
||||
some documents to produce the same signature even when the configured fields
|
||||
contained distinct (non-String) values. Users of SignatureUpdateProcessor
|
||||
are strongly advised that they should re-index as document signatures may
|
||||
have now changed. (see SOLR-3200 & SOLR-3226 for details)
|
||||
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-2020: Add Java client that uses Apache Http Components http client (4.x).
|
||||
|
@ -756,6 +763,13 @@ Bug Fixes
|
|||
* SOLR-3261: Fix edismax to respect query operators when literal colons
|
||||
are used in query string. (Juan Grande via hossman)
|
||||
|
||||
* SOLR-3226: Fix SignatureUpdateProcessor to no longer ignore non-String
|
||||
field values (Spyros Kapnissis, hossman)
|
||||
|
||||
* SOLR-3200: Fix SignatureUpdateProcessor "all fields" mode to use all
|
||||
fields of each document instead of hte fields specified by the first
|
||||
document indexed (Spyros Kapnissis via hossman)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji)
|
||||
|
|
|
@ -132,29 +132,30 @@ public class SignatureUpdateProcessorFactory
|
|||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
if (enabled) {
|
||||
SolrInputDocument doc = cmd.getSolrInputDocument();
|
||||
List<String> currDocSigFields = null;
|
||||
if (sigFields == null || sigFields.size() == 0) {
|
||||
Collection<String> docFields = doc.getFieldNames();
|
||||
sigFields = new ArrayList<String>(docFields.size());
|
||||
sigFields.addAll(docFields);
|
||||
Collections.sort(sigFields);
|
||||
currDocSigFields = new ArrayList<String>(docFields.size());
|
||||
currDocSigFields.addAll(docFields);
|
||||
Collections.sort(currDocSigFields);
|
||||
} else {
|
||||
currDocSigFields = sigFields;
|
||||
}
|
||||
|
||||
Signature sig = (Signature) req.getCore().getResourceLoader().newInstance(signatureClass);
|
||||
sig.init(params);
|
||||
|
||||
for (String field : sigFields) {
|
||||
for (String field : currDocSigFields) {
|
||||
SolrInputField f = doc.getField(field);
|
||||
if (f != null) {
|
||||
sig.add(field);
|
||||
Object o = f.getValue();
|
||||
if (o instanceof String) {
|
||||
sig.add((String)o);
|
||||
} else if (o instanceof Collection) {
|
||||
if (o instanceof Collection) {
|
||||
for (Object oo : (Collection)o) {
|
||||
if (oo instanceof String) {
|
||||
sig.add((String)oo);
|
||||
}
|
||||
sig.add(String.valueOf(oo));
|
||||
}
|
||||
} else {
|
||||
sig.add(String.valueOf(o));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
</fieldtype>
|
||||
</types>
|
||||
<fields>
|
||||
<field name="id" type="string" indexed="true" stored="true"/>
|
||||
<field name="signatureField" type="string" indexed="true" stored="false"/>
|
||||
<field name="text30" type="text30" indexed="true" stored="false" />
|
||||
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
|
||||
|
@ -51,4 +52,5 @@
|
|||
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
|
||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||
</fields>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
||||
|
|
|
@ -530,6 +530,16 @@
|
|||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="dedupe-allfields">
|
||||
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
|
||||
<bool name="enabled">false</bool>
|
||||
<bool name="overwriteDupes">false</bool>
|
||||
<str name="signatureField">id</str>
|
||||
<str name="fields"></str>
|
||||
<str name="signatureClass">org.apache.solr.update.processor.Lookup3Signature</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="stored_sig">
|
||||
<!-- this chain is valid even though the signature field is not
|
||||
indexed, because we are not asking for dups to be overwritten
|
||||
|
|
|
@ -22,6 +22,9 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
|
@ -29,9 +32,11 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.BinaryUpdateRequestHandler;
|
||||
import org.apache.solr.handler.XmlUpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -67,6 +72,29 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
req.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDupeAllFieldsDetection() throws Exception {
|
||||
|
||||
this.chain = "dedupe-allfields";
|
||||
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(this.chain);
|
||||
SignatureUpdateProcessorFactory factory = ((SignatureUpdateProcessorFactory) chained
|
||||
.getFactories()[0]);
|
||||
factory.setEnabled(true);
|
||||
assertNotNull(chained);
|
||||
|
||||
addDoc(adoc("v_t", "Hello Dude man!"));
|
||||
addDoc(adoc("v_t", "Hello Dude man!", "name", "name1'"));
|
||||
addDoc(adoc("v_t", "Hello Dude man!", "name", "name2'"));
|
||||
|
||||
addDoc(commit());
|
||||
|
||||
checkNumDocs(3);
|
||||
|
||||
factory.setEnabled(false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDupeDetection() throws Exception {
|
||||
|
@ -228,6 +256,100 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
assertTrue("Should have gotten an exception from inform(SolrCore)",
|
||||
exception_ok);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonStringFieldsValues() throws Exception {
|
||||
this.chain = "dedupe-allfields";
|
||||
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain chained = core
|
||||
.getUpdateProcessingChain(chain);
|
||||
SignatureUpdateProcessorFactory factory = ((SignatureUpdateProcessorFactory) chained
|
||||
.getFactories()[0]);
|
||||
factory.setEnabled(true);
|
||||
|
||||
Map<String,String[]> params = new HashMap<String,String[]>();
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] {chain});
|
||||
|
||||
UpdateRequest ureq = new UpdateRequest();
|
||||
|
||||
{
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("v_t", "same");
|
||||
doc.addField("weight", 1.0f);
|
||||
doc.addField("ints_is", 34);
|
||||
doc.addField("ints_is", 42);
|
||||
ureq.add(doc);
|
||||
}
|
||||
{
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("v_t", "same");
|
||||
doc.addField("weight", 2.0f);
|
||||
doc.addField("ints_is", 42);
|
||||
doc.addField("ints_is", 66);
|
||||
ureq.add(doc);
|
||||
}
|
||||
{
|
||||
// A and B should have same sig as eachother
|
||||
// even though the particulars of how the the ints_is list are built
|
||||
|
||||
SolrInputDocument docA = new SolrInputDocument();
|
||||
SolrInputDocument docB = new SolrInputDocument();
|
||||
|
||||
UnusualList<Integer> ints = new UnusualList<Integer>(3);
|
||||
for (int val : new int[] {42, 66, 34}) {
|
||||
docA.addField("ints_is", new Integer(val));
|
||||
ints.add(val);
|
||||
}
|
||||
docB.addField("ints_is", ints);
|
||||
|
||||
for (SolrInputDocument doc : new SolrInputDocument[] { docA, docB }) {
|
||||
doc.addField("v_t", "same");
|
||||
doc.addField("weight", 3.0f);
|
||||
ureq.add(doc);
|
||||
}
|
||||
}
|
||||
{
|
||||
// now add another doc with the same values as A & B above,
|
||||
// but diff ints_is collection (diff order)
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("v_t", "same");
|
||||
doc.addField("weight", 3.0f);
|
||||
for (int val : new int[] {66, 42, 34}) {
|
||||
doc.addField("ints_is", new Integer(val));
|
||||
}
|
||||
ureq.add(doc);
|
||||
}
|
||||
|
||||
|
||||
ArrayList<ContentStream> streams = new ArrayList<ContentStream>(2);
|
||||
streams.add(new BinaryRequestWriter().getContentStream(ureq));
|
||||
LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), mmparams);
|
||||
try {
|
||||
req.setContentStreams(streams);
|
||||
BinaryUpdateRequestHandler h = new BinaryUpdateRequestHandler();
|
||||
h.handleRequestBody(req, new SolrQueryResponse());
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
|
||||
addDoc(commit());
|
||||
|
||||
checkNumDocs(4);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/** A list with an unusual toString */
|
||||
private static final class UnusualList<T> extends ArrayList<T> {
|
||||
public UnusualList(int size) {
|
||||
super(size);
|
||||
}
|
||||
public String toString() {
|
||||
return "UNUSUAL:" + super.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||
|
|
Loading…
Reference in New Issue