SOLR-5201 - AnalysisEngines are now created in the factory and passed to the processors with a JCas pool

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2013-09-05 07:12:10 +00:00
parent 32803cb94f
commit e69fb35cc2
3 changed files with 110 additions and 67 deletions

View File

@ -17,49 +17,49 @@ package org.apache.solr.uima.processor;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField; import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
/** /**
* Update document(s) to be indexed with UIMA extracted information * Update document(s) to be indexed with UIMA extracted information
* *
*/ */
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class); private final Logger log = LoggerFactory
.getLogger(UIMAUpdateRequestProcessor.class);
SolrUIMAConfiguration solrUIMAConfiguration; SolrUIMAConfiguration solrUIMAConfiguration;
private AEProvider aeProvider; private AnalysisEngine ae;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, String coreName, private JCasPool pool;
SolrUIMAConfiguration config) {
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
JCasPool pool) {
super(next); super(next);
initialize(coreName, config); this.ae = ae;
} this.pool = pool;
private void initialize(String coreName, SolrUIMAConfiguration config) {
solrUIMAConfiguration = config; solrUIMAConfiguration = config;
aeProvider = AEProviderFactory.getInstance().getAEProvider(coreName,
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
} }
@Override @Override
public void processAdd(AddUpdateCommand cmd) throws IOException { public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null; String text = null;
@ -72,54 +72,66 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
for (String currentText : texts) { for (String currentText : texts) {
text = currentText; text = currentText;
if (text != null && text.length() > 0) { if (text != null && text.length() > 0) {
/* process the text value */ /* create a JCas which contain the text to analyze */
JCas jcas = processText(text); JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas); UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
/* get field mapping from config */ solrInputDocument, jcas);
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration /* get field mapping from config */
.getTypesFeaturesFieldsMapping(); Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
/* map type features on fields */ .getTypesFeaturesFieldsMapping();
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) { /* map type features on fields */
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN)); for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
} }
} }
} }
} catch (Exception e) { } catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField(); String logField = solrUIMAConfiguration.getLogField();
if(logField == null){ if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField(); SchemaField uniqueKeyField = cmd.getReq().getSchema()
if(uniqueKeyField != null){ .getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName(); logField = uniqueKeyField.getName();
} }
} }
String optionalFieldInfo = logField == null ? "." : String optionalFieldInfo = logField == null ? "."
new StringBuilder(". ").append(logField).append("=") : new StringBuilder(". ")
.append((String)cmd.getSolrInputDocument().getField(logField).getValue()) .append(logField)
.append(", ").toString(); .append("=")
.append(
(String) cmd.getSolrInputDocument().getField(logField)
.getValue()).append(", ").toString();
int len; int len;
String debugString; String debugString;
if (text != null && text.length() > 0) { if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100); len = Math.min(text.length(), 100);
debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString(); debugString = new StringBuilder(" text=\"")
} .append(text.substring(0, len)).append("...\"").toString();
else { } else {
debugString = " null text"; debugString = " null text";
} }
if (solrUIMAConfiguration.isIgnoreErrors()) { if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn("skip the text processing due to {}",new StringBuilder() log.warn(
.append(e.getLocalizedMessage()).append(optionalFieldInfo) "skip the text processing due to {}",
.append(debugString)); new StringBuilder().append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString));
} else { } else {
throw new SolrException(ErrorCode.SERVER_ERROR, throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder(
new StringBuilder("processing error ") "processing error ").append(e.getLocalizedMessage())
.append(e.getLocalizedMessage()).append(optionalFieldInfo) .append(optionalFieldInfo).append(debugString).toString(), e);
.append(debugString).toString(), e);
} }
} }
super.processAdd(cmd); super.processAdd(cmd);
} }
/* /*
* get the texts to analyze from the corresponding fields * get the texts to analyze from the corresponding fields
*/ */
@ -130,30 +142,31 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
if (merge) { if (merge) {
StringBuilder unifiedText = new StringBuilder(""); StringBuilder unifiedText = new StringBuilder("");
for (String aFieldsToAnalyze : fieldsToAnalyze) { for (String aFieldsToAnalyze : fieldsToAnalyze) {
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(aFieldsToAnalyze))); unifiedText.append(String.valueOf(solrInputDocument
.getFieldValue(aFieldsToAnalyze)));
} }
textVals = new String[1]; textVals = new String[1];
textVals[0] = unifiedText.toString(); textVals[0] = unifiedText.toString();
} else { } else {
textVals = new String[fieldsToAnalyze.length]; textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) { for (int i = 0; i < fieldsToAnalyze.length; i++) {
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])); textVals[i] = String.valueOf(solrInputDocument
.getFieldValue(fieldsToAnalyze[i]));
} }
} }
return textVals; return textVals;
} }
/* process a field value executing UIMA the CAS containing it as document text */ /*
private JCas processText(String textFieldValue) throws ResourceInitializationException, * process a field value executing UIMA on the JCas containing it as document
AnalysisEngineProcessException { * text
*/
private void processText(String textFieldValue, JCas jcas)
throws ResourceInitializationException, AnalysisEngineProcessException {
if (log.isDebugEnabled()) { if (log.isDebugEnabled()) {
log.debug("Analyzing text"); log.debug("Analyzing text");
} }
/* get the UIMA analysis engine */
AnalysisEngine ae = aeProvider.getAE();
/* create a JCas which contain the text to analyze */
JCas jcas = ae.newJCas();
jcas.setDocumentText(textFieldValue); jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */ /* perform analysis on text field */
@ -161,7 +174,6 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
if (log.isDebugEnabled()) { if (log.isDebugEnabled()) {
log.debug("Text processing completed"); log.debug("Text processing completed");
} }
return jcas;
} }
} }

View File

@ -17,20 +17,29 @@ package org.apache.solr.uima.processor;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
/** /**
* Factory for {@link UIMAUpdateRequestProcessor} * Factory for {@link UIMAUpdateRequestProcessor}
* *
* *
*/ */
public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { public class UIMAUpdateRequestProcessorFactory extends
UpdateRequestProcessorFactory {
private NamedList<Object> args; private NamedList<Object> args;
private AnalysisEngine ae;
private JCasPool pool;
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Override @Override
@ -39,10 +48,26 @@ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFac
} }
@Override @Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, public UpdateRequestProcessor getInstance(SolrQueryRequest req,
UpdateRequestProcessor next) { SolrQueryResponse rsp, UpdateRequestProcessor next) {
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
.readSolrUIMAConfiguration();
synchronized (this) {
if (ae == null && pool == null) {
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
req.getCore().getName(), configuration.getAePath(),
configuration.getRuntimeParameters());
try {
ae = aeProvider.getAE();
pool = new JCasPool(10, ae);
} catch (ResourceInitializationException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
}
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(), return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration()); configuration, ae, pool);
} }
} }

View File

@ -93,7 +93,6 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test @Test
public void testProcessing() throws Exception { public void testProcessing() throws Exception {
addDoc("uima", adoc( addDoc("uima", adoc(
"id", "id",
"2312312321312", "2312312321312",
@ -185,6 +184,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
} }
} }
@Test
public void testMultiplierProcessing() throws Exception {
for (int i = 0; i < RANDOM_MULTIPLIER; i++) {
testProcessing();
}
}
private void addDoc(String chain, String doc) throws Exception { private void addDoc(String chain, String doc) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>(); Map<String, String[]> params = new HashMap<String, String[]>();
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain }); params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });