SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on invalid config. Specificly: a signatureField that does not exist, or overwriteDupes=true with a signatureField that is not indexed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@944463 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2010-05-14 20:58:29 +00:00
parent 4e93b11311
commit 6fd7b91972
12 changed files with 111 additions and 4 deletions

View File

@ -287,6 +287,11 @@ Bug Fixes
* SOLR-1902: Exposed SolrResourceLoader's class loader for use by Tika
* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on
invalid config. Specificly: a signatureField that does not exist,
or overwriteDupes=true with a signatureField that is not indexed.
(hossman)
Other Changes
----------------------

View File

@ -23,6 +23,8 @@ import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.SolrParams;
@ -34,9 +36,14 @@ import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.DeleteUpdateCommand;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.plugin.SolrCoreAware;
public class SignatureUpdateProcessorFactory extends
UpdateRequestProcessorFactory {
public class SignatureUpdateProcessorFactory
extends UpdateRequestProcessorFactory
implements SolrCoreAware {
private List<String> sigFields;
private String signatureField;
@ -72,6 +79,23 @@ public class SignatureUpdateProcessorFactory extends
}
}
public void inform(SolrCore core) {
final SchemaField field = core.getSchema().getFieldOrNull(getSignatureField());
if (null == field) {
throw new SolrException
(ErrorCode.SERVER_ERROR,
"Can't use signatureField which does not exist in schema: "
+ getSignatureField());
}
if (getOverwriteDupes() && ( ! field.indexed() ) ) {
throw new SolrException
(ErrorCode.SERVER_ERROR,
"Can't set overwriteDupes when signatureField is not indexed: "
+ getSignatureField());
}
}
public List<String> getSigFields() {
return sigFields;
}

View File

@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.core.SolrCore;
@ -37,6 +38,9 @@ import org.apache.solr.util.AbstractSolrTestCase;
*/
public class SignatureUpdateProcessorFactoryTest extends AbstractSolrTestCase {
/** modified by tests as needed */
private String processor = "dedupe";
@Override
public String getSchemaFile() {
return "schema12.xml";
@ -47,6 +51,12 @@ public class SignatureUpdateProcessorFactoryTest extends AbstractSolrTestCase {
return "solrconfig.xml";
}
@Override
public void setUp() throws Exception {
super.setUp();
processor = "dedupe"; // set the default that most tests expect
}
public void testDupeDetection() throws Exception {
SolrCore core = h.getCore();
UpdateRequestProcessorChain chained = core.getUpdateProcessingChain(
@ -169,10 +179,45 @@ public class SignatureUpdateProcessorFactoryTest extends AbstractSolrTestCase {
factory.setEnabled(false);
}
/**
* a non-indexed signatureField is fine as long as overwriteDupes==false
*/
public void testNonIndexedSignatureField() throws Exception {
SolrCore core = h.getCore();
assertEquals("docs found when none are expected at start",
0l, core.getSearcher().get().getReader().numDocs());
processor = "stored_sig";
addDoc(adoc("id", "2a", "v_t", "Hello Dude man!", "name", "ali babi'"));
addDoc(adoc("id", "2b", "v_t", "Hello Dude man!", "name", "ali babi'"));
addDoc(commit());
assertEquals("did not find exepcted docs",
2l, core.getSearcher().get().getReader().numDocs());
}
public void testFailNonIndexedSigWithOverwriteDupes() throws Exception {
SolrCore core = h.getCore();
SignatureUpdateProcessorFactory f = new SignatureUpdateProcessorFactory();
NamedList<String> initArgs = new NamedList<String>();
initArgs.add("overwriteDupes", "true");
initArgs.add("signatureField", "signatureField_sS");
f.init(initArgs);
boolean exception_ok = false;
try {
f.inform(core);
} catch (Exception e) {
exception_ok = true;
}
assertTrue("Should have gotten an exception from inform(SolrCore)",
exception_ok);
}
private void addDoc(String doc) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>();
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
params.put(UpdateParams.UPDATE_PROCESSOR, new String[] { "dedupe" });
params.put(UpdateParams.UPDATE_PROCESSOR, new String[] { processor });
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(),
(SolrParams) mmparams) {
};

View File

@ -35,12 +35,16 @@
<fieldType name="string" class="solr.StrField"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<field name="fAgain" type="text" indexed="true" stored="true"/>
<field name="fAgain" type="text" indexed="true" stored="true"/>
<dynamicField name="*_twice" type="text" indexed="true" stored="true"/>
<dynamicField name="*_twice" type="text" indexed="true" stored="true"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</fields>
<defaultSearchField>id</defaultSearchField>

View File

@ -394,6 +394,7 @@
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="highlight" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>

View File

@ -17,6 +17,7 @@
-->
<schema name="luceneMatchVersionTest" version="1.1">
<types>
<fieldtype name="string" class="solr.StrField"/>
<fieldtype name="text20" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory" luceneMatchVersion="LUCENE_20"/>
@ -43,9 +44,11 @@
</fieldtype>
</types>
<fields>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<field name="text20" type="text20" indexed="true" stored="false" />
<field name="textDefault" type="textDefault" indexed="true" stored="false" />
<field name="textStandardAnalyzer20" type="textStandardAnalyzer20" indexed="true" stored="false" />
<field name="textStandardAnalyzerDefault" type="textStandardAnalyzerDefault" indexed="true" stored="false" />
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</fields>
</schema>

View File

@ -39,6 +39,8 @@
<fields>
<field name="id" type="sint" indexed="true" stored="true" required="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</fields>
<defaultSearchField>subject</defaultSearchField>

View File

@ -376,6 +376,7 @@
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="sint" indexed="true" stored="true" default="42" multiValued="false"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>

View File

@ -26,6 +26,7 @@
<fieldtype name="integer" class="solr.IntField" />
<fieldtype name="string" class="solr.StrField" />
<fieldtype name="text" class="solr.TextField">
<analyzer>
@ -72,6 +73,9 @@
<field name="two" type="rev" indexed="true" stored="false"/>
<field name="three" type="text" indexed="true" stored="false"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</fields>
<defaultSearchField>one</defaultSearchField>

View File

@ -274,6 +274,7 @@
<!-- for testing, a type that does a transform to see if it's correctly done everywhere -->
<field name="id" type="sfloat" indexed="true" stored="true" required="true" />
<field name="text" type="text" indexed="true" stored="false" />
<field name="signatureField" type="string" indexed="true" stored="false"/>
<field name="tint" type="tint" indexed="true" stored="true" />
<field name="tfloat" type="tfloat" indexed="true" stored="true" />
@ -293,6 +294,7 @@
both match, the first appearing in the schema will be used. -->
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>

View File

@ -292,6 +292,8 @@
<field name="point_hash" type="geohash" indexed="true" stored="true" multiValued="false"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
@ -301,6 +303,7 @@
both match, the first appearing in the schema will be used. -->
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="*_ii" type="integer" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_is" type="sint" indexed="true" stored="true" multiValued="true"/>

View File

@ -459,5 +459,18 @@
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="stored_sig">
<!-- this chain is valid even though the signature field is not
indexed, because we are not asking for dups to be overwritten
-->
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">non_indexed_signature_sS</str>
<bool name="overwriteDupes">false</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
</config>