SOLR-5201 - AnalysisEngines are now created in the factory and passed to the processors with a JCas pool

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tommaso Teofili 2013-09-05 07:12:10 +00:00
parent 32803cb94f
commit e69fb35cc2
3 changed files with 110 additions and 67 deletions

View File

@ -17,49 +17,49 @@ package org.apache.solr.uima.processor;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
/**
* Update document(s) to be indexed with UIMA extracted information
*
*
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class);
private final Logger log = LoggerFactory
.getLogger(UIMAUpdateRequestProcessor.class);
SolrUIMAConfiguration solrUIMAConfiguration;
private AEProvider aeProvider;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, String coreName,
SolrUIMAConfiguration config) {
private AnalysisEngine ae;
private JCasPool pool;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
JCasPool pool) {
super(next);
initialize(coreName, config);
}
private void initialize(String coreName, SolrUIMAConfiguration config) {
this.ae = ae;
this.pool = pool;
solrUIMAConfiguration = config;
aeProvider = AEProviderFactory.getInstance().getAEProvider(coreName,
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
@ -72,54 +72,66 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
for (String currentText : texts) {
text = currentText;
if (text != null && text.length() > 0) {
/* process the text value */
JCas jcas = processText(text);
/* create a JCas which contain the text to analyze */
JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
solrInputDocument, jcas);
/* get field mapping from config */
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if(logField == null){
SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField();
if(uniqueKeyField != null){
if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema()
.getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." :
new StringBuilder(". ").append(logField).append("=")
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
.append(", ").toString();
String optionalFieldInfo = logField == null ? "."
: new StringBuilder(". ")
.append(logField)
.append("=")
.append(
(String) cmd.getSolrInputDocument().getField(logField)
.getValue()).append(", ").toString();
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString();
}
else {
debugString = new StringBuilder(" text=\"")
.append(text.substring(0, len)).append("...\"").toString();
} else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn("skip the text processing due to {}",new StringBuilder()
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(debugString));
log.warn(
"skip the text processing due to {}",
new StringBuilder().append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString));
} else {
throw new SolrException(ErrorCode.SERVER_ERROR,
new StringBuilder("processing error ")
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(debugString).toString(), e);
throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder(
"processing error ").append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString).toString(), e);
}
}
super.processAdd(cmd);
}
/*
* get the texts to analyze from the corresponding fields
*/
@ -130,30 +142,31 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
if (merge) {
StringBuilder unifiedText = new StringBuilder("");
for (String aFieldsToAnalyze : fieldsToAnalyze) {
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(aFieldsToAnalyze)));
unifiedText.append(String.valueOf(solrInputDocument
.getFieldValue(aFieldsToAnalyze)));
}
textVals = new String[1];
textVals[0] = unifiedText.toString();
} else {
textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) {
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
textVals[i] = String.valueOf(solrInputDocument
.getFieldValue(fieldsToAnalyze[i]));
}
}
return textVals;
}
/* process a field value executing UIMA the CAS containing it as document text */
private JCas processText(String textFieldValue) throws ResourceInitializationException,
AnalysisEngineProcessException {
/*
* process a field value executing UIMA on the JCas containing it as document
* text
*/
private void processText(String textFieldValue, JCas jcas)
throws ResourceInitializationException, AnalysisEngineProcessException {
if (log.isDebugEnabled()) {
log.debug("Analyzing text");
}
/* get the UIMA analysis engine */
AnalysisEngine ae = aeProvider.getAE();
/* create a JCas which contain the text to analyze */
JCas jcas = ae.newJCas();
jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */
@ -161,7 +174,6 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
if (log.isDebugEnabled()) {
log.debug("Text processing completed");
}
return jcas;
}
}

View File

@ -17,20 +17,29 @@ package org.apache.solr.uima.processor;
* limitations under the License.
*/
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
/**
* Factory for {@link UIMAUpdateRequestProcessor}
*
*
*
*/
public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory {
public class UIMAUpdateRequestProcessorFactory extends
UpdateRequestProcessorFactory {
private NamedList<Object> args;
private AnalysisEngine ae;
private JCasPool pool;
@SuppressWarnings("unchecked")
@Override
@ -39,10 +48,26 @@ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFac
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp,
UpdateRequestProcessor next) {
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp, UpdateRequestProcessor next) {
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
.readSolrUIMAConfiguration();
synchronized (this) {
if (ae == null && pool == null) {
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
req.getCore().getName(), configuration.getAePath(),
configuration.getRuntimeParameters());
try {
ae = aeProvider.getAE();
pool = new JCasPool(10, ae);
} catch (ResourceInitializationException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
}
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration());
configuration, ae, pool);
}
}

View File

@ -93,7 +93,6 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
@Test
public void testProcessing() throws Exception {
addDoc("uima", adoc(
"id",
"2312312321312",
@ -185,6 +184,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
}
}
@Test
public void testMultiplierProcessing() throws Exception {
for (int i = 0; i < RANDOM_MULTIPLIER; i++) {
testProcessing();
}
}
private void addDoc(String chain, String doc) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>();
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });