SOLR-2503: feature value map to dynamicField

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1101047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-05-09 14:56:01 +00:00
parent 1d842018fc
commit b30e4e4ec8
12 changed files with 182 additions and 24 deletions

View File

@ -28,6 +28,11 @@ Upgrading from Solr 3.1
It should move to UIMAUpdateRequestProcessorFactory setting.
See contrib/uima/README.txt for more details. (SOLR-2436)
New Features
----------------------
* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
Test Cases:
----------------------

View File

@ -30,14 +30,14 @@ public class SolrUIMAConfiguration {
private boolean fieldsMerging;
private Map<String, Map<String, String>> typesFeaturesFieldsMapping;
private Map<String, Map<String, MapField>> typesFeaturesFieldsMapping;
private String aePath;
private Map<String, Object> runtimeParameters;
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
Map<String, Map<String, String>> typesFeaturesFieldsMapping,
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
Map<String, Object> runtimeParameters) {
this.aePath = aePath;
this.fieldsToAnalyze = fieldsToAnalyze;
@ -54,7 +54,7 @@ public class SolrUIMAConfiguration {
return fieldsMerging;
}
public Map<String, Map<String, String>> getTypesFeaturesFieldsMapping() {
public Map<String, Map<String, MapField>> getTypesFeaturesFieldsMapping() {
return typesFeaturesFieldsMapping;
}
@ -65,4 +65,39 @@ public class SolrUIMAConfiguration {
public Map<String, Object> getRuntimeParameters() {
return runtimeParameters;
}
static final class MapField {
private String fieldName, fieldNameFeature;
private boolean prefix; // valid if dynamicField == true
// false: *_s, true: s_*
MapField(String fieldName, String fieldNameFeature){
this.fieldName = fieldName;
this.fieldNameFeature = fieldNameFeature;
if(fieldNameFeature != null){
if(fieldName.startsWith("*")){
prefix = false;
this.fieldName = fieldName.substring(1);
}
else if(fieldName.endsWith("*")){
prefix = true;
this.fieldName = fieldName.substring(0, fieldName.length() - 1);
}
else
throw new RuntimeException("static field name cannot be used for dynamicField");
}
}
String getFieldNameFeature(){
return fieldNameFeature;
}
String getFieldName(String featureValue){
if(fieldNameFeature != null){
return prefix ? fieldName + featureValue : featureValue + fieldName;
}
return fieldName;
}
}
}

View File

@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
/**
* Read configuration for Solr-UIMA integration
@ -62,8 +63,8 @@ public class SolrUIMAConfigurationReader {
}
@SuppressWarnings("rawtypes")
private Map<String, Map<String, String>> readTypesFeaturesFieldsMapping() {
Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
private Map<String, Map<String, MapField>> readTypesFeaturesFieldsMapping() {
Map<String, Map<String, MapField>> map = new HashMap<String, Map<String, MapField>>();
NamedList fieldMappings = (NamedList) args.get("fieldMappings");
/* iterate over UIMA types */
@ -71,13 +72,21 @@ public class SolrUIMAConfigurationReader {
NamedList type = (NamedList) fieldMappings.get("type", i);
String typeName = (String)type.get("name");
Map<String, String> subMap = new HashMap<String, String>();
Map<String, MapField> subMap = new HashMap<String, MapField>();
/* iterate over mapping definitions */
for(int j = 0; j < type.size() - 1; j++){
NamedList mapping = (NamedList) type.get("mapping", j + 1);
String featureName = (String) mapping.get("feature");
String fieldNameFeature = null;
String mappedFieldName = (String) mapping.get("field");
subMap.put(featureName, mappedFieldName);
if(mappedFieldName == null){
fieldNameFeature = (String) mapping.get("fieldNameFeature");
mappedFieldName = (String) mapping.get("dynamicField");
}
if(mappedFieldName == null)
throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
subMap.put(featureName, mapField);
}
map.put(typeName, subMap);
}

View File

@ -20,6 +20,7 @@ package org.apache.solr.uima.processor;
import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
@ -53,7 +54,7 @@ public class UIMAToSolrMapper {
* name of UIMA type to map
* @param featureFieldsmapping
*/
public void map(String typeName, Map<String, String> featureFieldsmapping) {
public void map(String typeName, Map<String, MapField> featureFieldsmapping) {
try {
FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor(
JCas.class).newInstance(cas);
@ -62,7 +63,11 @@ public class UIMAToSolrMapper {
.hasNext();) {
FeatureStructure fs = iterator.next();
for (String featureName : featureFieldsmapping.keySet()) {
String fieldName = featureFieldsmapping.get(featureName);
MapField mapField = featureFieldsmapping.get(featureName);
String fieldNameFeature = mapField.getFieldNameFeature();
String fieldNameFeatureValue = fieldNameFeature == null ? null :
fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
String fieldName = mapField.getFieldName(fieldNameFeatureValue);
log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName)
.append(" to ").append(fieldName).toString());
String featureValue = null;

View File

@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.uima.processor.ae.AEProvider;
import org.apache.solr.uima.processor.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand;
@ -69,7 +70,7 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
Map<String, Map<String, String>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {

View File

@ -33,6 +33,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.junit.Before;
@ -82,11 +83,11 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
UpdateRequestProcessor processor = factory.getInstance(req(), null, null);
assertTrue(processor instanceof UIMAUpdateRequestProcessor);
SolrUIMAConfiguration conf = ((UIMAUpdateRequestProcessor)processor).solrUIMAConfiguration;
Map<String, Map<String, String>> map = conf.getTypesFeaturesFieldsMapping();
Map<String, String> subMap = map.get("a-type-which-can-have-multiple-features");
Map<String, Map<String, MapField>> map = conf.getTypesFeaturesFieldsMapping();
Map<String, MapField> subMap = map.get("a-type-which-can-have-multiple-features");
assertEquals(2, subMap.size());
assertEquals("1", subMap.get("A"));
assertEquals("2", subMap.get("B"));
assertEquals("1", subMap.get("A").getFieldName(null));
assertEquals("2", subMap.get("B").getFieldName(null));
}
@Test
@ -104,7 +105,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']");
assertQ(req("sentiment:*"), "//*[@numFound='0']");
assertQ(req("entity:Prague"), "//*[@numFound='1']");
assertQ(req("OTHER_sm:Prague"), "//*[@numFound='1']");
}
@Test
@ -124,7 +125,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertQ(req("sentence:*"), "//*[@numFound='2']");
assertQ(req("sentiment:positive"), "//*[@numFound='1']");
assertQ(req("entity:Apache"), "//*[@numFound='2']");
assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
}
private void addDoc(String doc) throws Exception {

View File

@ -34,6 +34,12 @@ public class DummyEntityAnnotator extends JCasAnnotator_ImplBase{
EntityAnnotation entityAnnotation = new EntityAnnotation(jcas);
entityAnnotation.setBegin(annotation.getBegin());
entityAnnotation.setEnd(annotation.getEnd());
String entityString = annotation.getCoveredText();
entityAnnotation.setEntity(entityString);
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
if(entityString.equals("Apache"))
name = "ORGANIZATION";
entityAnnotation.setName(name);
entityAnnotation.addToIndexes();
}
}

View File

@ -1,6 +1,6 @@
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas;
@ -11,8 +11,8 @@ import org.apache.uima.jcas.tcas.Annotation;
/**
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
* XML source: /Users/tommasoteofili/Documents/workspaces/lucene_workspace/lucene_dev/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
* Updated by JCasGen Sat May 07 22:33:38 JST 2011
* XML source: /Users/koji/Documents/workspace/DummyEntityAnnotator/desc/DummyEntityAEDescriptor.xml
* @generated */
public class EntityAnnotation extends Annotation {
/** @generated
@ -57,6 +57,42 @@ public class EntityAnnotation extends Annotation {
@generated modifiable */
private void readObject() {}
}
//*--------------*
//* Feature: name
/** getter for name - gets
* @generated */
public String getName() {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name);}
/** setter for name - sets
* @generated */
public void setName(String v) {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name, v);}
//*--------------*
//* Feature: entity
/** getter for entity - gets
* @generated */
public String getEntity() {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity);}
/** setter for entity - sets
* @generated */
public void setEntity(String v) {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity, v);}
}

View File

@ -1,5 +1,5 @@
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas;
@ -9,10 +9,12 @@ import org.apache.uima.cas.impl.FSGenerator;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.impl.TypeImpl;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.FeatureImpl;
import org.apache.uima.cas.Feature;
import org.apache.uima.jcas.tcas.Annotation_Type;
/**
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
* Updated by JCasGen Sat May 07 22:33:38 JST 2011
* @generated */
public class EntityAnnotation_Type extends Annotation_Type {
/** @generated */
@ -38,6 +40,42 @@ public class EntityAnnotation_Type extends Annotation_Type {
/** @generated
@modifiable */
public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.solr.uima.ts.EntityAnnotation");
/** @generated */
final Feature casFeat_name;
/** @generated */
final int casFeatCode_name;
/** @generated */
public String getName(int addr) {
if (featOkTst && casFeat_name == null)
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
return ll_cas.ll_getStringValue(addr, casFeatCode_name);
}
/** @generated */
public void setName(int addr, String v) {
if (featOkTst && casFeat_name == null)
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
ll_cas.ll_setStringValue(addr, casFeatCode_name, v);}
/** @generated */
final Feature casFeat_entity;
/** @generated */
final int casFeatCode_entity;
/** @generated */
public String getEntity(int addr) {
if (featOkTst && casFeat_entity == null)
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
return ll_cas.ll_getStringValue(addr, casFeatCode_entity);
}
/** @generated */
public void setEntity(int addr, String v) {
if (featOkTst && casFeat_entity == null)
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
ll_cas.ll_setStringValue(addr, casFeatCode_entity, v);}
@ -47,6 +85,14 @@ public class EntityAnnotation_Type extends Annotation_Type {
super(jcas, casType);
casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
casFeat_name = jcas.getRequiredFeatureDE(casType, "name", "uima.cas.String", featOkTst);
casFeatCode_name = (null == casFeat_name) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_name).getCode();
casFeat_entity = jcas.getRequiredFeatureDE(casType, "entity", "uima.cas.String", featOkTst);
casFeatCode_entity = (null == casFeat_entity) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_entity).getCode();
}
}

View File

@ -32,6 +32,18 @@
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
<description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>name</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>entity</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription>
</types>
</typeSystemDescription>

View File

@ -597,6 +597,7 @@
stored="true" multiValued="true"/> <dynamicField name="random_*"
type="random" />
-->
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
<!--
uncomment the following to ignore any fields that don't already
match an existing field name or dynamic field, rather than

View File

@ -995,8 +995,9 @@
<lst name="type">
<str name="name">org.apache.solr.uima.ts.EntityAnnotation</str>
<lst name="mapping">
<str name="feature">coveredText</str>
<str name="field">entity</str>
<str name="feature">entity</str>
<str name="fieldNameFeature">name</str>
<str name="dynamicField">*_sm</str>
</lst>
</lst>
</lst>