mirror of https://github.com/apache/lucene.git
SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102785 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ed573a067b
commit
0ec6d7a81b
|
@ -33,6 +33,9 @@ New Features
|
|||
|
||||
* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
|
||||
|
||||
* SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.
|
||||
(Tommaso Teofili, koji)
|
||||
|
||||
Test Cases:
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -30,6 +30,13 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
|
|||
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
|
||||
</lst>
|
||||
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
|
||||
<!-- Set to true if you want to continue indexing even if text processing fails.
|
||||
Default is false. That is, Solr throws RuntimeException and
|
||||
never indexed documents entirely in your session. -->
|
||||
<bool name="ignoreErrors">true</bool>
|
||||
<!-- This is optional. It is used for logging when text processing fails.
|
||||
Usually, set uniqueKey field name -->
|
||||
<str name="logField">id</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
|
|
|
@ -36,14 +36,20 @@ public class SolrUIMAConfiguration {
|
|||
|
||||
private Map<String, Object> runtimeParameters;
|
||||
|
||||
private boolean ignoreErrors;
|
||||
|
||||
private String logField;
|
||||
|
||||
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
|
||||
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
|
||||
Map<String, Object> runtimeParameters) {
|
||||
Map<String, Object> runtimeParameters, boolean ignoreErrors, String logField) {
|
||||
this.aePath = aePath;
|
||||
this.fieldsToAnalyze = fieldsToAnalyze;
|
||||
this.fieldsMerging = fieldsMerging;
|
||||
this.runtimeParameters = runtimeParameters;
|
||||
this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
|
||||
this.ignoreErrors = ignoreErrors;
|
||||
this.logField = logField;
|
||||
}
|
||||
|
||||
public String[] getFieldsToAnalyze() {
|
||||
|
@ -65,6 +71,14 @@ public class SolrUIMAConfiguration {
|
|||
public Map<String, Object> getRuntimeParameters() {
|
||||
return runtimeParameters;
|
||||
}
|
||||
|
||||
public boolean isIgnoreErrors() {
|
||||
return ignoreErrors;
|
||||
}
|
||||
|
||||
public String getLogField(){
|
||||
return logField;
|
||||
}
|
||||
|
||||
static final class MapField {
|
||||
|
||||
|
|
|
@ -40,7 +40,8 @@ public class SolrUIMAConfigurationReader {
|
|||
|
||||
public SolrUIMAConfiguration readSolrUIMAConfiguration() {
|
||||
return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
|
||||
readTypesFeaturesFieldsMapping(), readAEOverridingParameters());
|
||||
readTypesFeaturesFieldsMapping(), readAEOverridingParameters(), readIgnoreErrors(),
|
||||
readLogField());
|
||||
}
|
||||
|
||||
private String readAEPath() {
|
||||
|
@ -105,4 +106,12 @@ public class SolrUIMAConfigurationReader {
|
|||
return runtimeParameters;
|
||||
}
|
||||
|
||||
private boolean readIgnoreErrors() {
|
||||
Object ignoreErrors = args.get("ignoreErrors");
|
||||
return ignoreErrors == null ? false : (Boolean)ignoreErrors;
|
||||
}
|
||||
|
||||
private String readLogField() {
|
||||
return (String)args.get("logField");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,9 @@ package org.apache.solr.uima.processor;
|
|||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.solr.uima.processor.ae.AEProvider;
|
||||
|
@ -58,12 +60,15 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
String text = null;
|
||||
try {
|
||||
/* get Solr document */
|
||||
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
|
||||
|
||||
/* get the fields to analyze */
|
||||
for (String text : getTextsToAnalyze(solrInputDocument)) {
|
||||
String[] texts = getTextsToAnalyze(solrInputDocument);
|
||||
for (int i = 0; i < texts.length; i++) {
|
||||
text = texts[i];
|
||||
if (text != null && !"".equals(text)) {
|
||||
/* process the text value */
|
||||
JCas jcas = processText(text);
|
||||
|
@ -79,7 +84,21 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
}
|
||||
} catch (UIMAException e) {
|
||||
throw new RuntimeException(e);
|
||||
String logField = solrUIMAConfiguration.getLogField();
|
||||
String optionalFieldInfo = logField == null ? "." :
|
||||
new StringBuilder(". ").append(logField).append("=")
|
||||
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
|
||||
.append(", ").toString();
|
||||
if (solrUIMAConfiguration.isIgnoreErrors())
|
||||
log.warn(new StringBuilder("skip the text processing due to ")
|
||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
||||
.append(" text=\"").append(text.substring(0, 100)).append("...\"").toString());
|
||||
else{
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
new StringBuilder("processing error: ")
|
||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
||||
.append(" text=\"").append(text.substring(0, 100)).append("...\"").toString(), e);
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testProcessing() throws Exception {
|
||||
|
||||
addDoc(adoc(
|
||||
addDoc("uima", adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
|
@ -111,13 +111,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testTwoUpdates() throws Exception {
|
||||
|
||||
addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
|
||||
addDoc("uima", adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
|
||||
+ "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
|
||||
+ "Hemisphere!"));
|
||||
assertU(commit());
|
||||
assertQ(req("sentence:*"), "//*[@numFound='1']");
|
||||
|
||||
addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
|
||||
addDoc("uima", adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
|
||||
+ "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
|
||||
+ " attendee-driven, facilitated by members of the Apache community and will "
|
||||
+ "focus on the Apache..."));
|
||||
|
@ -128,9 +128,41 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
@Test
|
||||
public void testErrorHandling() throws Exception {
|
||||
|
||||
try{
|
||||
addDoc("uima-not-ignoreErrors", adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes. \n "
|
||||
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||
+ " Last Lucene European Conference has been held in Prague."));
|
||||
fail("exception shouldn't be ignored");
|
||||
}
|
||||
catch(RuntimeException expected){}
|
||||
assertU(commit());
|
||||
assertQ(req("*:*"), "//*[@numFound='0']");
|
||||
|
||||
addDoc("uima-ignoreErrors", adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
"text",
|
||||
"SpellCheckComponent got improvement related to recent Lucene changes. \n "
|
||||
+ "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
|
||||
+ "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
|
||||
+ " attached if you need it, but it is also committed to trunk and 3_x branch."
|
||||
+ " Last Lucene European Conference has been held in Prague."));
|
||||
assertU(commit());
|
||||
assertQ(req("*:*"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
private void addDoc(String chain, String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uima" });
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
|
||||
};
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package org.apache.solr.uima.processor.an;
|
||||
|
||||
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class DummyExceptionAnnotator extends JCasAnnotator_ImplBase{
|
||||
|
||||
@Override
|
||||
public void process(JCas jcas) throws AnalysisEngineProcessException {
|
||||
throw new AnalysisEngineProcessException();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>true</primitive>
|
||||
<annotatorImplementationName>org.apache.solr.uima.processor.an.DummyExceptionAnnotator</annotatorImplementationName>
|
||||
<analysisEngineMetaData>
|
||||
<name>DummyExceptionAEDescriptor</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor>ASF</vendor>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<typeSystemDescription/>
|
||||
<typePriorities/>
|
||||
<fsIndexCollection/>
|
||||
<capabilities/>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -0,0 +1,54 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
|
||||
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
|
||||
<primitive>false</primitive>
|
||||
<delegateAnalysisEngineSpecifiers>
|
||||
<delegateAnalysisEngine key="DummyExceptionAEDescriptor">
|
||||
<import location="DummyExceptionAEDescriptor.xml"/>
|
||||
</delegateAnalysisEngine>
|
||||
</delegateAnalysisEngineSpecifiers>
|
||||
<analysisEngineMetaData>
|
||||
<name>TestExceptionAE</name>
|
||||
<description/>
|
||||
<version>1.0</version>
|
||||
<vendor/>
|
||||
<configurationParameters/>
|
||||
<configurationParameterSettings/>
|
||||
<flowConstraints>
|
||||
<fixedFlow>
|
||||
<node>DummyExceptionAEDescriptor</node>
|
||||
</fixedFlow>
|
||||
</flowConstraints>
|
||||
<fsIndexCollection/>
|
||||
<capabilities>
|
||||
<capability>
|
||||
<inputs/>
|
||||
<outputs/>
|
||||
<languagesSupported/>
|
||||
</capability>
|
||||
</capabilities>
|
||||
<operationalProperties>
|
||||
<modifiesCas>true</modifiesCas>
|
||||
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
|
||||
<outputsNewCASes>false</outputsNewCASes>
|
||||
</operationalProperties>
|
||||
</analysisEngineMetaData>
|
||||
<resourceManagerConfiguration/>
|
||||
</analysisEngineDescription>
|
|
@ -1003,7 +1003,6 @@
|
|||
</lst>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.LogUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
|
@ -1037,6 +1036,48 @@
|
|||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uima-not-ignoreErrors">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/TestExceptionAE.xml</str>
|
||||
<bool name="ignoreErrors">false</bool>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings"/>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="uima-ignoreErrors">
|
||||
<processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
|
||||
<lst name="uimaConfig">
|
||||
<lst name="runtimeParameters">
|
||||
<int name="ngramsize">3</int>
|
||||
</lst>
|
||||
<str name="analysisEngine">/TestExceptionAE.xml</str>
|
||||
<bool name="ignoreErrors">true</bool>
|
||||
<!-- This is optional. It is used for logging when text processing fails. Usually, set uniqueKey field name -->
|
||||
<str name="logField">id</str>
|
||||
<lst name="analyzeFields">
|
||||
<bool name="merge">false</bool>
|
||||
<arr name="fields">
|
||||
<str>text</str>
|
||||
</arr>
|
||||
</lst>
|
||||
<lst name="fieldMappings"/>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<!--
|
||||
queryResponseWriter plugins... query responses will be written using
|
||||
the writer specified by the 'wt' request parameter matching the name
|
||||
|
|
Loading…
Reference in New Issue