mirror of https://github.com/apache/lucene.git
SOLR-5201 - AnalysisEngines are now created in the factory and passed to the processors with a JCas pool
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
32803cb94f
commit
e69fb35cc2
|
@ -17,49 +17,49 @@ package org.apache.solr.uima.processor;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProvider;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||
import org.apache.uima.jcas.JCas;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.JCasPool;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Update document(s) to be indexed with UIMA extracted information
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||
|
||||
private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class);
|
||||
|
||||
|
||||
private final Logger log = LoggerFactory
|
||||
.getLogger(UIMAUpdateRequestProcessor.class);
|
||||
|
||||
SolrUIMAConfiguration solrUIMAConfiguration;
|
||||
|
||||
private AEProvider aeProvider;
|
||||
|
||||
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, String coreName,
|
||||
SolrUIMAConfiguration config) {
|
||||
|
||||
private AnalysisEngine ae;
|
||||
|
||||
private JCasPool pool;
|
||||
|
||||
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
|
||||
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
|
||||
JCasPool pool) {
|
||||
super(next);
|
||||
initialize(coreName, config);
|
||||
}
|
||||
|
||||
private void initialize(String coreName, SolrUIMAConfiguration config) {
|
||||
this.ae = ae;
|
||||
this.pool = pool;
|
||||
solrUIMAConfiguration = config;
|
||||
aeProvider = AEProviderFactory.getInstance().getAEProvider(coreName,
|
||||
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
String text = null;
|
||||
|
@ -72,54 +72,66 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
for (String currentText : texts) {
|
||||
text = currentText;
|
||||
if (text != null && text.length() > 0) {
|
||||
/* process the text value */
|
||||
JCas jcas = processText(text);
|
||||
/* create a JCas which contain the text to analyze */
|
||||
JCas jcas = pool.getJCas(0);
|
||||
try {
|
||||
/* process the text value */
|
||||
processText(text, jcas);
|
||||
|
||||
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
|
||||
/* get field mapping from config */
|
||||
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||
.getTypesFeaturesFieldsMapping();
|
||||
/* map type features on fields */
|
||||
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
|
||||
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
|
||||
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
|
||||
solrInputDocument, jcas);
|
||||
/* get field mapping from config */
|
||||
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||
.getTypesFeaturesFieldsMapping();
|
||||
/* map type features on fields */
|
||||
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
|
||||
.entrySet()) {
|
||||
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
|
||||
}
|
||||
} finally {
|
||||
pool.releaseJCas(jcas);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
String logField = solrUIMAConfiguration.getLogField();
|
||||
if(logField == null){
|
||||
SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField();
|
||||
if(uniqueKeyField != null){
|
||||
if (logField == null) {
|
||||
SchemaField uniqueKeyField = cmd.getReq().getSchema()
|
||||
.getUniqueKeyField();
|
||||
if (uniqueKeyField != null) {
|
||||
logField = uniqueKeyField.getName();
|
||||
}
|
||||
}
|
||||
String optionalFieldInfo = logField == null ? "." :
|
||||
new StringBuilder(". ").append(logField).append("=")
|
||||
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
|
||||
.append(", ").toString();
|
||||
String optionalFieldInfo = logField == null ? "."
|
||||
: new StringBuilder(". ")
|
||||
.append(logField)
|
||||
.append("=")
|
||||
.append(
|
||||
(String) cmd.getSolrInputDocument().getField(logField)
|
||||
.getValue()).append(", ").toString();
|
||||
int len;
|
||||
String debugString;
|
||||
if (text != null && text.length() > 0) {
|
||||
len = Math.min(text.length(), 100);
|
||||
debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString();
|
||||
}
|
||||
else {
|
||||
debugString = new StringBuilder(" text=\"")
|
||||
.append(text.substring(0, len)).append("...\"").toString();
|
||||
} else {
|
||||
debugString = " null text";
|
||||
}
|
||||
if (solrUIMAConfiguration.isIgnoreErrors()) {
|
||||
log.warn("skip the text processing due to {}",new StringBuilder()
|
||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
||||
.append(debugString));
|
||||
log.warn(
|
||||
"skip the text processing due to {}",
|
||||
new StringBuilder().append(e.getLocalizedMessage())
|
||||
.append(optionalFieldInfo).append(debugString));
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
new StringBuilder("processing error ")
|
||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
||||
.append(debugString).toString(), e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder(
|
||||
"processing error ").append(e.getLocalizedMessage())
|
||||
.append(optionalFieldInfo).append(debugString).toString(), e);
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* get the texts to analyze from the corresponding fields
|
||||
*/
|
||||
|
@ -130,30 +142,31 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
if (merge) {
|
||||
StringBuilder unifiedText = new StringBuilder("");
|
||||
for (String aFieldsToAnalyze : fieldsToAnalyze) {
|
||||
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(aFieldsToAnalyze)));
|
||||
unifiedText.append(String.valueOf(solrInputDocument
|
||||
.getFieldValue(aFieldsToAnalyze)));
|
||||
}
|
||||
textVals = new String[1];
|
||||
textVals[0] = unifiedText.toString();
|
||||
} else {
|
||||
textVals = new String[fieldsToAnalyze.length];
|
||||
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
||||
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
|
||||
textVals[i] = String.valueOf(solrInputDocument
|
||||
.getFieldValue(fieldsToAnalyze[i]));
|
||||
}
|
||||
}
|
||||
return textVals;
|
||||
}
|
||||
|
||||
/* process a field value executing UIMA the CAS containing it as document text */
|
||||
private JCas processText(String textFieldValue) throws ResourceInitializationException,
|
||||
AnalysisEngineProcessException {
|
||||
|
||||
/*
|
||||
* process a field value executing UIMA on the JCas containing it as document
|
||||
* text
|
||||
*/
|
||||
private void processText(String textFieldValue, JCas jcas)
|
||||
throws ResourceInitializationException, AnalysisEngineProcessException {
|
||||
if (log.isDebugEnabled()) {
|
||||
log.debug("Analyzing text");
|
||||
}
|
||||
/* get the UIMA analysis engine */
|
||||
AnalysisEngine ae = aeProvider.getAE();
|
||||
|
||||
/* create a JCas which contain the text to analyze */
|
||||
JCas jcas = ae.newJCas();
|
||||
jcas.setDocumentText(textFieldValue);
|
||||
|
||||
/* perform analysis on text field */
|
||||
|
@ -161,7 +174,6 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
if (log.isDebugEnabled()) {
|
||||
log.debug("Text processing completed");
|
||||
}
|
||||
return jcas;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -17,20 +17,29 @@ package org.apache.solr.uima.processor;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.uima.ae.AEProvider;
|
||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||
import org.apache.uima.resource.ResourceInitializationException;
|
||||
import org.apache.uima.util.JCasPool;
|
||||
|
||||
/**
|
||||
* Factory for {@link UIMAUpdateRequestProcessor}
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory {
|
||||
public class UIMAUpdateRequestProcessorFactory extends
|
||||
UpdateRequestProcessorFactory {
|
||||
|
||||
private NamedList<Object> args;
|
||||
private AnalysisEngine ae;
|
||||
private JCasPool pool;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
|
@ -39,10 +48,26 @@ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFac
|
|||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp, UpdateRequestProcessor next) {
|
||||
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
|
||||
.readSolrUIMAConfiguration();
|
||||
synchronized (this) {
|
||||
if (ae == null && pool == null) {
|
||||
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
|
||||
req.getCore().getName(), configuration.getAePath(),
|
||||
configuration.getRuntimeParameters());
|
||||
try {
|
||||
ae = aeProvider.getAE();
|
||||
pool = new JCasPool(10, ae);
|
||||
} catch (ResourceInitializationException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
|
||||
new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration());
|
||||
configuration, ae, pool);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -93,7 +93,6 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testProcessing() throws Exception {
|
||||
|
||||
addDoc("uima", adoc(
|
||||
"id",
|
||||
"2312312321312",
|
||||
|
@ -185,6 +184,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiplierProcessing() throws Exception {
|
||||
for (int i = 0; i < RANDOM_MULTIPLIER; i++) {
|
||||
testProcessing();
|
||||
}
|
||||
}
|
||||
|
||||
private void addDoc(String chain, String doc) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
||||
|
|
Loading…
Reference in New Issue