mirror of https://github.com/apache/lucene.git
SOLR-5201 - AnalysisEngines are now created in the factory and passed to the processors with a JCas pool
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
32803cb94f
commit
e69fb35cc2
|
@ -17,49 +17,49 @@ package org.apache.solr.uima.processor;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||||
import org.apache.lucene.analysis.uima.ae.AEProvider;
|
|
||||||
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
|
||||||
import org.apache.solr.update.AddUpdateCommand;
|
import org.apache.solr.update.AddUpdateCommand;
|
||||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngine;
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
|
||||||
import org.apache.uima.jcas.JCas;
|
import org.apache.uima.jcas.JCas;
|
||||||
import org.apache.uima.resource.ResourceInitializationException;
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.JCasPool;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update document(s) to be indexed with UIMA extracted information
|
* Update document(s) to be indexed with UIMA extracted information
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||||
|
|
||||||
private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class);
|
private final Logger log = LoggerFactory
|
||||||
|
.getLogger(UIMAUpdateRequestProcessor.class);
|
||||||
|
|
||||||
SolrUIMAConfiguration solrUIMAConfiguration;
|
SolrUIMAConfiguration solrUIMAConfiguration;
|
||||||
|
|
||||||
private AEProvider aeProvider;
|
private AnalysisEngine ae;
|
||||||
|
|
||||||
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, String coreName,
|
private JCasPool pool;
|
||||||
SolrUIMAConfiguration config) {
|
|
||||||
|
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
|
||||||
|
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
|
||||||
|
JCasPool pool) {
|
||||||
super(next);
|
super(next);
|
||||||
initialize(coreName, config);
|
this.ae = ae;
|
||||||
}
|
this.pool = pool;
|
||||||
|
|
||||||
private void initialize(String coreName, SolrUIMAConfiguration config) {
|
|
||||||
solrUIMAConfiguration = config;
|
solrUIMAConfiguration = config;
|
||||||
aeProvider = AEProviderFactory.getInstance().getAEProvider(coreName,
|
|
||||||
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||||
String text = null;
|
String text = null;
|
||||||
|
@ -72,54 +72,66 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||||
for (String currentText : texts) {
|
for (String currentText : texts) {
|
||||||
text = currentText;
|
text = currentText;
|
||||||
if (text != null && text.length() > 0) {
|
if (text != null && text.length() > 0) {
|
||||||
/* process the text value */
|
/* create a JCas which contain the text to analyze */
|
||||||
JCas jcas = processText(text);
|
JCas jcas = pool.getJCas(0);
|
||||||
|
try {
|
||||||
|
/* process the text value */
|
||||||
|
processText(text, jcas);
|
||||||
|
|
||||||
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
|
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
|
||||||
/* get field mapping from config */
|
solrInputDocument, jcas);
|
||||||
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
/* get field mapping from config */
|
||||||
.getTypesFeaturesFieldsMapping();
|
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||||
/* map type features on fields */
|
.getTypesFeaturesFieldsMapping();
|
||||||
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
|
/* map type features on fields */
|
||||||
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
|
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
|
||||||
|
.entrySet()) {
|
||||||
|
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
pool.releaseJCas(jcas);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String logField = solrUIMAConfiguration.getLogField();
|
String logField = solrUIMAConfiguration.getLogField();
|
||||||
if(logField == null){
|
if (logField == null) {
|
||||||
SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField();
|
SchemaField uniqueKeyField = cmd.getReq().getSchema()
|
||||||
if(uniqueKeyField != null){
|
.getUniqueKeyField();
|
||||||
|
if (uniqueKeyField != null) {
|
||||||
logField = uniqueKeyField.getName();
|
logField = uniqueKeyField.getName();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
String optionalFieldInfo = logField == null ? "." :
|
String optionalFieldInfo = logField == null ? "."
|
||||||
new StringBuilder(". ").append(logField).append("=")
|
: new StringBuilder(". ")
|
||||||
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
|
.append(logField)
|
||||||
.append(", ").toString();
|
.append("=")
|
||||||
|
.append(
|
||||||
|
(String) cmd.getSolrInputDocument().getField(logField)
|
||||||
|
.getValue()).append(", ").toString();
|
||||||
int len;
|
int len;
|
||||||
String debugString;
|
String debugString;
|
||||||
if (text != null && text.length() > 0) {
|
if (text != null && text.length() > 0) {
|
||||||
len = Math.min(text.length(), 100);
|
len = Math.min(text.length(), 100);
|
||||||
debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString();
|
debugString = new StringBuilder(" text=\"")
|
||||||
}
|
.append(text.substring(0, len)).append("...\"").toString();
|
||||||
else {
|
} else {
|
||||||
debugString = " null text";
|
debugString = " null text";
|
||||||
}
|
}
|
||||||
if (solrUIMAConfiguration.isIgnoreErrors()) {
|
if (solrUIMAConfiguration.isIgnoreErrors()) {
|
||||||
log.warn("skip the text processing due to {}",new StringBuilder()
|
log.warn(
|
||||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
"skip the text processing due to {}",
|
||||||
.append(debugString));
|
new StringBuilder().append(e.getLocalizedMessage())
|
||||||
|
.append(optionalFieldInfo).append(debugString));
|
||||||
} else {
|
} else {
|
||||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder(
|
||||||
new StringBuilder("processing error ")
|
"processing error ").append(e.getLocalizedMessage())
|
||||||
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
|
.append(optionalFieldInfo).append(debugString).toString(), e);
|
||||||
.append(debugString).toString(), e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
super.processAdd(cmd);
|
super.processAdd(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get the texts to analyze from the corresponding fields
|
* get the texts to analyze from the corresponding fields
|
||||||
*/
|
*/
|
||||||
|
@ -130,30 +142,31 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||||
if (merge) {
|
if (merge) {
|
||||||
StringBuilder unifiedText = new StringBuilder("");
|
StringBuilder unifiedText = new StringBuilder("");
|
||||||
for (String aFieldsToAnalyze : fieldsToAnalyze) {
|
for (String aFieldsToAnalyze : fieldsToAnalyze) {
|
||||||
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(aFieldsToAnalyze)));
|
unifiedText.append(String.valueOf(solrInputDocument
|
||||||
|
.getFieldValue(aFieldsToAnalyze)));
|
||||||
}
|
}
|
||||||
textVals = new String[1];
|
textVals = new String[1];
|
||||||
textVals[0] = unifiedText.toString();
|
textVals[0] = unifiedText.toString();
|
||||||
} else {
|
} else {
|
||||||
textVals = new String[fieldsToAnalyze.length];
|
textVals = new String[fieldsToAnalyze.length];
|
||||||
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
for (int i = 0; i < fieldsToAnalyze.length; i++) {
|
||||||
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
|
textVals[i] = String.valueOf(solrInputDocument
|
||||||
|
.getFieldValue(fieldsToAnalyze[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return textVals;
|
return textVals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* process a field value executing UIMA the CAS containing it as document text */
|
/*
|
||||||
private JCas processText(String textFieldValue) throws ResourceInitializationException,
|
* process a field value executing UIMA on the JCas containing it as document
|
||||||
AnalysisEngineProcessException {
|
* text
|
||||||
|
*/
|
||||||
|
private void processText(String textFieldValue, JCas jcas)
|
||||||
|
throws ResourceInitializationException, AnalysisEngineProcessException {
|
||||||
if (log.isDebugEnabled()) {
|
if (log.isDebugEnabled()) {
|
||||||
log.debug("Analyzing text");
|
log.debug("Analyzing text");
|
||||||
}
|
}
|
||||||
/* get the UIMA analysis engine */
|
|
||||||
AnalysisEngine ae = aeProvider.getAE();
|
|
||||||
|
|
||||||
/* create a JCas which contain the text to analyze */
|
|
||||||
JCas jcas = ae.newJCas();
|
|
||||||
jcas.setDocumentText(textFieldValue);
|
jcas.setDocumentText(textFieldValue);
|
||||||
|
|
||||||
/* perform analysis on text field */
|
/* perform analysis on text field */
|
||||||
|
@ -161,7 +174,6 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
||||||
if (log.isDebugEnabled()) {
|
if (log.isDebugEnabled()) {
|
||||||
log.debug("Text processing completed");
|
log.debug("Text processing completed");
|
||||||
}
|
}
|
||||||
return jcas;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,20 +17,29 @@ package org.apache.solr.uima.processor;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.uima.ae.AEProvider;
|
||||||
|
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||||
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
|
||||||
|
import org.apache.uima.analysis_engine.AnalysisEngine;
|
||||||
|
import org.apache.uima.resource.ResourceInitializationException;
|
||||||
|
import org.apache.uima.util.JCasPool;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link UIMAUpdateRequestProcessor}
|
* Factory for {@link UIMAUpdateRequestProcessor}
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory {
|
public class UIMAUpdateRequestProcessorFactory extends
|
||||||
|
UpdateRequestProcessorFactory {
|
||||||
|
|
||||||
private NamedList<Object> args;
|
private NamedList<Object> args;
|
||||||
|
private AnalysisEngine ae;
|
||||||
|
private JCasPool pool;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
|
@ -39,10 +48,26 @@ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFac
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp,
|
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||||
UpdateRequestProcessor next) {
|
SolrQueryResponse rsp, UpdateRequestProcessor next) {
|
||||||
|
SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args)
|
||||||
|
.readSolrUIMAConfiguration();
|
||||||
|
synchronized (this) {
|
||||||
|
if (ae == null && pool == null) {
|
||||||
|
AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider(
|
||||||
|
req.getCore().getName(), configuration.getAePath(),
|
||||||
|
configuration.getRuntimeParameters());
|
||||||
|
try {
|
||||||
|
ae = aeProvider.getAE();
|
||||||
|
pool = new JCasPool(10, ae);
|
||||||
|
} catch (ResourceInitializationException e) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
|
return new UIMAUpdateRequestProcessor(next, req.getCore().getName(),
|
||||||
new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration());
|
configuration, ae, pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,7 +93,6 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testProcessing() throws Exception {
|
public void testProcessing() throws Exception {
|
||||||
|
|
||||||
addDoc("uima", adoc(
|
addDoc("uima", adoc(
|
||||||
"id",
|
"id",
|
||||||
"2312312321312",
|
"2312312321312",
|
||||||
|
@ -185,6 +184,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiplierProcessing() throws Exception {
|
||||||
|
for (int i = 0; i < RANDOM_MULTIPLIER; i++) {
|
||||||
|
testProcessing();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void addDoc(String chain, String doc) throws Exception {
|
private void addDoc(String chain, String doc) throws Exception {
|
||||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
||||||
|
|
Loading…
Reference in New Issue