From e69fb35cc24ff819954c313fcae5daadbd7754ac Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Thu, 5 Sep 2013 07:12:10 +0000 Subject: [PATCH] SOLR-5201 - AnalysisEngines are now created in the factory and passed to the processors with a JCas pool git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1520239 13f79535-47bb-0310-9956-ffa450edef68 --- .../processor/UIMAUpdateRequestProcessor.java | 132 ++++++++++-------- .../UIMAUpdateRequestProcessorFactory.java | 37 ++++- .../UIMAUpdateRequestProcessorTest.java | 8 +- 3 files changed, 110 insertions(+), 67 deletions(-) diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java index 4f57b862686..377f7216fbe 100644 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java +++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java @@ -17,49 +17,49 @@ package org.apache.solr.uima.processor; * limitations under the License. */ +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; + import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.schema.SchemaField; import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField; -import org.apache.lucene.analysis.uima.ae.AEProvider; -import org.apache.lucene.analysis.uima.ae.AEProviderFactory; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.JCasPool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.Map; - /** * Update document(s) to be indexed with UIMA extracted information - * + * */ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { - - private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class); - + + private final Logger log = LoggerFactory + .getLogger(UIMAUpdateRequestProcessor.class); + SolrUIMAConfiguration solrUIMAConfiguration; - - private AEProvider aeProvider; - - public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, String coreName, - SolrUIMAConfiguration config) { + + private AnalysisEngine ae; + + private JCasPool pool; + + public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, + String coreName, SolrUIMAConfiguration config, AnalysisEngine ae, + JCasPool pool) { super(next); - initialize(coreName, config); - } - - private void initialize(String coreName, SolrUIMAConfiguration config) { + this.ae = ae; + this.pool = pool; solrUIMAConfiguration = config; - aeProvider = AEProviderFactory.getInstance().getAEProvider(coreName, - solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters()); } - + @Override public void processAdd(AddUpdateCommand cmd) throws IOException { String text = null; @@ -72,54 +72,66 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { for (String currentText : texts) { text = currentText; if (text != null && text.length() > 0) { - /* process the text value */ - JCas jcas = processText(text); + /* create a JCas which contain the text to analyze */ + JCas jcas = pool.getJCas(0); + try { + /* process the text value */ + processText(text, jcas); - UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas); - /* get field mapping from config */ - Map> typesAndFeaturesFieldsMap = solrUIMAConfiguration - .getTypesFeaturesFieldsMapping(); - /* map type features on fields */ - for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) { - uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN)); + UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper( + solrInputDocument, jcas); + /* get field mapping from config */ + Map> typesAndFeaturesFieldsMap = solrUIMAConfiguration + .getTypesFeaturesFieldsMapping(); + /* map type features on fields */ + for (Entry> entry : typesAndFeaturesFieldsMap + .entrySet()) { + uimaToSolrMapper.map(entry.getKey(), entry.getValue()); + } + } finally { + pool.releaseJCas(jcas); } } } } catch (Exception e) { String logField = solrUIMAConfiguration.getLogField(); - if(logField == null){ - SchemaField uniqueKeyField = cmd.getReq().getSchema().getUniqueKeyField(); - if(uniqueKeyField != null){ + if (logField == null) { + SchemaField uniqueKeyField = cmd.getReq().getSchema() + .getUniqueKeyField(); + if (uniqueKeyField != null) { logField = uniqueKeyField.getName(); } } - String optionalFieldInfo = logField == null ? "." : - new StringBuilder(". ").append(logField).append("=") - .append((String)cmd.getSolrInputDocument().getField(logField).getValue()) - .append(", ").toString(); + String optionalFieldInfo = logField == null ? "." + : new StringBuilder(". ") + .append(logField) + .append("=") + .append( + (String) cmd.getSolrInputDocument().getField(logField) + .getValue()).append(", ").toString(); int len; String debugString; if (text != null && text.length() > 0) { len = Math.min(text.length(), 100); - debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString(); - } - else { + debugString = new StringBuilder(" text=\"") + .append(text.substring(0, len)).append("...\"").toString(); + } else { debugString = " null text"; } if (solrUIMAConfiguration.isIgnoreErrors()) { - log.warn("skip the text processing due to {}",new StringBuilder() - .append(e.getLocalizedMessage()).append(optionalFieldInfo) - .append(debugString)); + log.warn( + "skip the text processing due to {}", + new StringBuilder().append(e.getLocalizedMessage()) + .append(optionalFieldInfo).append(debugString)); } else { - throw new SolrException(ErrorCode.SERVER_ERROR, - new StringBuilder("processing error ") - .append(e.getLocalizedMessage()).append(optionalFieldInfo) - .append(debugString).toString(), e); + throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder( + "processing error ").append(e.getLocalizedMessage()) + .append(optionalFieldInfo).append(debugString).toString(), e); } } super.processAdd(cmd); } - + /* * get the texts to analyze from the corresponding fields */ @@ -130,30 +142,31 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { if (merge) { StringBuilder unifiedText = new StringBuilder(""); for (String aFieldsToAnalyze : fieldsToAnalyze) { - unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(aFieldsToAnalyze))); + unifiedText.append(String.valueOf(solrInputDocument + .getFieldValue(aFieldsToAnalyze))); } textVals = new String[1]; textVals[0] = unifiedText.toString(); } else { textVals = new String[fieldsToAnalyze.length]; for (int i = 0; i < fieldsToAnalyze.length; i++) { - textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])); + textVals[i] = String.valueOf(solrInputDocument + .getFieldValue(fieldsToAnalyze[i])); } } return textVals; } - - /* process a field value executing UIMA the CAS containing it as document text */ - private JCas processText(String textFieldValue) throws ResourceInitializationException, - AnalysisEngineProcessException { + + /* + * process a field value executing UIMA on the JCas containing it as document + * text + */ + private void processText(String textFieldValue, JCas jcas) + throws ResourceInitializationException, AnalysisEngineProcessException { if (log.isDebugEnabled()) { log.debug("Analyzing text"); } - /* get the UIMA analysis engine */ - AnalysisEngine ae = aeProvider.getAE(); - /* create a JCas which contain the text to analyze */ - JCas jcas = ae.newJCas(); jcas.setDocumentText(textFieldValue); /* perform analysis on text field */ @@ -161,7 +174,6 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor { if (log.isDebugEnabled()) { log.debug("Text processing completed"); } - return jcas; } - + } diff --git a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java index c088df8ff89..2f89dc554c1 100644 --- a/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java +++ b/solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.java @@ -17,20 +17,29 @@ package org.apache.solr.uima.processor; * limitations under the License. */ +import org.apache.lucene.analysis.uima.ae.AEProvider; +import org.apache.lucene.analysis.uima.ae.AEProviderFactory; +import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.JCasPool; /** * Factory for {@link UIMAUpdateRequestProcessor} * - * + * */ -public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory { +public class UIMAUpdateRequestProcessorFactory extends + UpdateRequestProcessorFactory { private NamedList args; + private AnalysisEngine ae; + private JCasPool pool; @SuppressWarnings("unchecked") @Override @@ -39,10 +48,26 @@ public class UIMAUpdateRequestProcessorFactory extends UpdateRequestProcessorFac } @Override - public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, - UpdateRequestProcessor next) { + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, UpdateRequestProcessor next) { + SolrUIMAConfiguration configuration = new SolrUIMAConfigurationReader(args) + .readSolrUIMAConfiguration(); + synchronized (this) { + if (ae == null && pool == null) { + AEProvider aeProvider = AEProviderFactory.getInstance().getAEProvider( + req.getCore().getName(), configuration.getAePath(), + configuration.getRuntimeParameters()); + try { + ae = aeProvider.getAE(); + pool = new JCasPool(10, ae); + } catch (ResourceInitializationException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } + } + } + return new UIMAUpdateRequestProcessor(next, req.getCore().getName(), - new SolrUIMAConfigurationReader(args).readSolrUIMAConfiguration()); + configuration, ae, pool); } - + } diff --git a/solr/contrib/uima/src/test/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java b/solr/contrib/uima/src/test/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java index d5b5ddfc906..3f4a8f2da19 100644 --- a/solr/contrib/uima/src/test/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java +++ b/solr/contrib/uima/src/test/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java @@ -93,7 +93,6 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { @Test public void testProcessing() throws Exception { - addDoc("uima", adoc( "id", "2312312321312", @@ -185,6 +184,13 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 { } } + @Test + public void testMultiplierProcessing() throws Exception { + for (int i = 0; i < RANDOM_MULTIPLIER; i++) { + testProcessing(); + } + } + private void addDoc(String chain, String doc) throws Exception { Map params = new HashMap(); params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });