SOLR-2503: feature value map to dynamicField

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1101047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-05-09 14:56:01 +00:00
parent 1d842018fc
commit b30e4e4ec8
12 changed files with 182 additions and 24 deletions

View File

@ -28,6 +28,11 @@ Upgrading from Solr 3.1
It should move to UIMAUpdateRequestProcessorFactory setting. It should move to UIMAUpdateRequestProcessorFactory setting.
See contrib/uima/README.txt for more details. (SOLR-2436) See contrib/uima/README.txt for more details. (SOLR-2436)
New Features
----------------------
* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
Test Cases: Test Cases:
---------------------- ----------------------

View File

@ -30,14 +30,14 @@ public class SolrUIMAConfiguration {
private boolean fieldsMerging; private boolean fieldsMerging;
private Map<String, Map<String, String>> typesFeaturesFieldsMapping; private Map<String, Map<String, MapField>> typesFeaturesFieldsMapping;
private String aePath; private String aePath;
private Map<String, Object> runtimeParameters; private Map<String, Object> runtimeParameters;
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging, public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
Map<String, Map<String, String>> typesFeaturesFieldsMapping, Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
Map<String, Object> runtimeParameters) { Map<String, Object> runtimeParameters) {
this.aePath = aePath; this.aePath = aePath;
this.fieldsToAnalyze = fieldsToAnalyze; this.fieldsToAnalyze = fieldsToAnalyze;
@ -54,7 +54,7 @@ public class SolrUIMAConfiguration {
return fieldsMerging; return fieldsMerging;
} }
public Map<String, Map<String, String>> getTypesFeaturesFieldsMapping() { public Map<String, Map<String, MapField>> getTypesFeaturesFieldsMapping() {
return typesFeaturesFieldsMapping; return typesFeaturesFieldsMapping;
} }
@ -65,4 +65,39 @@ public class SolrUIMAConfiguration {
public Map<String, Object> getRuntimeParameters() { public Map<String, Object> getRuntimeParameters() {
return runtimeParameters; return runtimeParameters;
} }
static final class MapField {
private String fieldName, fieldNameFeature;
private boolean prefix; // valid if dynamicField == true
// false: *_s, true: s_*
MapField(String fieldName, String fieldNameFeature){
this.fieldName = fieldName;
this.fieldNameFeature = fieldNameFeature;
if(fieldNameFeature != null){
if(fieldName.startsWith("*")){
prefix = false;
this.fieldName = fieldName.substring(1);
}
else if(fieldName.endsWith("*")){
prefix = true;
this.fieldName = fieldName.substring(0, fieldName.length() - 1);
}
else
throw new RuntimeException("static field name cannot be used for dynamicField");
}
}
String getFieldNameFeature(){
return fieldNameFeature;
}
String getFieldName(String featureValue){
if(fieldNameFeature != null){
return prefix ? fieldName + featureValue : featureValue + fieldName;
}
return fieldName;
}
}
} }

View File

@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
/** /**
* Read configuration for Solr-UIMA integration * Read configuration for Solr-UIMA integration
@ -62,8 +63,8 @@ public class SolrUIMAConfigurationReader {
} }
@SuppressWarnings("rawtypes") @SuppressWarnings("rawtypes")
private Map<String, Map<String, String>> readTypesFeaturesFieldsMapping() { private Map<String, Map<String, MapField>> readTypesFeaturesFieldsMapping() {
Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>(); Map<String, Map<String, MapField>> map = new HashMap<String, Map<String, MapField>>();
NamedList fieldMappings = (NamedList) args.get("fieldMappings"); NamedList fieldMappings = (NamedList) args.get("fieldMappings");
/* iterate over UIMA types */ /* iterate over UIMA types */
@ -71,13 +72,21 @@ public class SolrUIMAConfigurationReader {
NamedList type = (NamedList) fieldMappings.get("type", i); NamedList type = (NamedList) fieldMappings.get("type", i);
String typeName = (String)type.get("name"); String typeName = (String)type.get("name");
Map<String, String> subMap = new HashMap<String, String>(); Map<String, MapField> subMap = new HashMap<String, MapField>();
/* iterate over mapping definitions */ /* iterate over mapping definitions */
for(int j = 0; j < type.size() - 1; j++){ for(int j = 0; j < type.size() - 1; j++){
NamedList mapping = (NamedList) type.get("mapping", j + 1); NamedList mapping = (NamedList) type.get("mapping", j + 1);
String featureName = (String) mapping.get("feature"); String featureName = (String) mapping.get("feature");
String fieldNameFeature = null;
String mappedFieldName = (String) mapping.get("field"); String mappedFieldName = (String) mapping.get("field");
subMap.put(featureName, mappedFieldName); if(mappedFieldName == null){
fieldNameFeature = (String) mapping.get("fieldNameFeature");
mappedFieldName = (String) mapping.get("dynamicField");
}
if(mappedFieldName == null)
throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
subMap.put(featureName, mapField);
} }
map.put(typeName, subMap); map.put(typeName, subMap);
} }

View File

@ -20,6 +20,7 @@ package org.apache.solr.uima.processor;
import java.util.Map; import java.util.Map;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type; import org.apache.uima.cas.Type;
@ -53,7 +54,7 @@ public class UIMAToSolrMapper {
* name of UIMA type to map * name of UIMA type to map
* @param featureFieldsmapping * @param featureFieldsmapping
*/ */
public void map(String typeName, Map<String, String> featureFieldsmapping) { public void map(String typeName, Map<String, MapField> featureFieldsmapping) {
try { try {
FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor( FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor(
JCas.class).newInstance(cas); JCas.class).newInstance(cas);
@ -62,7 +63,11 @@ public class UIMAToSolrMapper {
.hasNext();) { .hasNext();) {
FeatureStructure fs = iterator.next(); FeatureStructure fs = iterator.next();
for (String featureName : featureFieldsmapping.keySet()) { for (String featureName : featureFieldsmapping.keySet()) {
String fieldName = featureFieldsmapping.get(featureName); MapField mapField = featureFieldsmapping.get(featureName);
String fieldNameFeature = mapField.getFieldNameFeature();
String fieldNameFeatureValue = fieldNameFeature == null ? null :
fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
String fieldName = mapField.getFieldName(fieldNameFeatureValue);
log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName) log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName)
.append(" to ").append(fieldName).toString()); .append(" to ").append(fieldName).toString());
String featureValue = null; String featureValue = null;

View File

@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.uima.processor.ae.AEProvider; import org.apache.solr.uima.processor.ae.AEProvider;
import org.apache.solr.uima.processor.ae.AEProviderFactory; import org.apache.solr.uima.processor.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.AddUpdateCommand;
@ -69,7 +70,7 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas); UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */ /* get field mapping from config */
Map<String, Map<String, String>> typesAndFeaturesFieldsMap = solrUIMAConfiguration Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping(); .getTypesFeaturesFieldsMapping();
/* map type features on fields */ /* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) { for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {

View File

@ -33,6 +33,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.XmlUpdateRequestHandler; import org.apache.solr.handler.XmlUpdateRequestHandler;
import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain; import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.junit.Before; import org.junit.Before;
@ -82,11 +83,11 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
UpdateRequestProcessor processor = factory.getInstance(req(), null, null); UpdateRequestProcessor processor = factory.getInstance(req(), null, null);
assertTrue(processor instanceof UIMAUpdateRequestProcessor); assertTrue(processor instanceof UIMAUpdateRequestProcessor);
SolrUIMAConfiguration conf = ((UIMAUpdateRequestProcessor)processor).solrUIMAConfiguration; SolrUIMAConfiguration conf = ((UIMAUpdateRequestProcessor)processor).solrUIMAConfiguration;
Map<String, Map<String, String>> map = conf.getTypesFeaturesFieldsMapping(); Map<String, Map<String, MapField>> map = conf.getTypesFeaturesFieldsMapping();
Map<String, String> subMap = map.get("a-type-which-can-have-multiple-features"); Map<String, MapField> subMap = map.get("a-type-which-can-have-multiple-features");
assertEquals(2, subMap.size()); assertEquals(2, subMap.size());
assertEquals("1", subMap.get("A")); assertEquals("1", subMap.get("A").getFieldName(null));
assertEquals("2", subMap.get("B")); assertEquals("2", subMap.get("B").getFieldName(null));
} }
@Test @Test
@ -104,7 +105,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertU(commit()); assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']"); assertQ(req("sentence:*"), "//*[@numFound='1']");
assertQ(req("sentiment:*"), "//*[@numFound='0']"); assertQ(req("sentiment:*"), "//*[@numFound='0']");
assertQ(req("entity:Prague"), "//*[@numFound='1']"); assertQ(req("OTHER_sm:Prague"), "//*[@numFound='1']");
} }
@Test @Test
@ -124,7 +125,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
assertQ(req("sentence:*"), "//*[@numFound='2']"); assertQ(req("sentence:*"), "//*[@numFound='2']");
assertQ(req("sentiment:positive"), "//*[@numFound='1']"); assertQ(req("sentiment:positive"), "//*[@numFound='1']");
assertQ(req("entity:Apache"), "//*[@numFound='2']"); assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
} }
private void addDoc(String doc) throws Exception { private void addDoc(String doc) throws Exception {

View File

@ -34,6 +34,12 @@ public class DummyEntityAnnotator extends JCasAnnotator_ImplBase{
EntityAnnotation entityAnnotation = new EntityAnnotation(jcas); EntityAnnotation entityAnnotation = new EntityAnnotation(jcas);
entityAnnotation.setBegin(annotation.getBegin()); entityAnnotation.setBegin(annotation.getBegin());
entityAnnotation.setEnd(annotation.getEnd()); entityAnnotation.setEnd(annotation.getEnd());
String entityString = annotation.getCoveredText();
entityAnnotation.setEntity(entityString);
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
if(entityString.equals("Apache"))
name = "ORGANIZATION";
entityAnnotation.setName(name);
entityAnnotation.addToIndexes(); entityAnnotation.addToIndexes();
} }
} }

View File

@ -1,6 +1,6 @@
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */ /* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts; package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JCas;
@ -11,8 +11,8 @@ import org.apache.uima.jcas.tcas.Annotation;
/** /**
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011 * Updated by JCasGen Sat May 07 22:33:38 JST 2011
* XML source: /Users/tommasoteofili/Documents/workspaces/lucene_workspace/lucene_dev/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml * XML source: /Users/koji/Documents/workspace/DummyEntityAnnotator/desc/DummyEntityAEDescriptor.xml
* @generated */ * @generated */
public class EntityAnnotation extends Annotation { public class EntityAnnotation extends Annotation {
/** @generated /** @generated
@ -57,6 +57,42 @@ public class EntityAnnotation extends Annotation {
@generated modifiable */ @generated modifiable */
private void readObject() {} private void readObject() {}
}
//*--------------*
//* Feature: name
/** getter for name - gets
* @generated */
public String getName() {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name);}
/** setter for name - sets
* @generated */
public void setName(String v) {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name, v);}
//*--------------*
//* Feature: entity
/** getter for entity - gets
* @generated */
public String getEntity() {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity);}
/** setter for entity - sets
* @generated */
public void setEntity(String v) {
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity, v);}
}

View File

@ -1,5 +1,5 @@
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */ /* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
package org.apache.solr.uima.ts; package org.apache.solr.uima.ts;
import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JCas;
@ -9,10 +9,12 @@ import org.apache.uima.cas.impl.FSGenerator;
import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.impl.TypeImpl; import org.apache.uima.cas.impl.TypeImpl;
import org.apache.uima.cas.Type; import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.FeatureImpl;
import org.apache.uima.cas.Feature;
import org.apache.uima.jcas.tcas.Annotation_Type; import org.apache.uima.jcas.tcas.Annotation_Type;
/** /**
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011 * Updated by JCasGen Sat May 07 22:33:38 JST 2011
* @generated */ * @generated */
public class EntityAnnotation_Type extends Annotation_Type { public class EntityAnnotation_Type extends Annotation_Type {
/** @generated */ /** @generated */
@ -38,6 +40,42 @@ public class EntityAnnotation_Type extends Annotation_Type {
/** @generated /** @generated
@modifiable */ @modifiable */
public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.solr.uima.ts.EntityAnnotation"); public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.solr.uima.ts.EntityAnnotation");
/** @generated */
final Feature casFeat_name;
/** @generated */
final int casFeatCode_name;
/** @generated */
public String getName(int addr) {
if (featOkTst && casFeat_name == null)
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
return ll_cas.ll_getStringValue(addr, casFeatCode_name);
}
/** @generated */
public void setName(int addr, String v) {
if (featOkTst && casFeat_name == null)
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
ll_cas.ll_setStringValue(addr, casFeatCode_name, v);}
/** @generated */
final Feature casFeat_entity;
/** @generated */
final int casFeatCode_entity;
/** @generated */
public String getEntity(int addr) {
if (featOkTst && casFeat_entity == null)
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
return ll_cas.ll_getStringValue(addr, casFeatCode_entity);
}
/** @generated */
public void setEntity(int addr, String v) {
if (featOkTst && casFeat_entity == null)
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
ll_cas.ll_setStringValue(addr, casFeatCode_entity, v);}
@ -47,6 +85,14 @@ public class EntityAnnotation_Type extends Annotation_Type {
super(jcas, casType); super(jcas, casType);
casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
casFeat_name = jcas.getRequiredFeatureDE(casType, "name", "uima.cas.String", featOkTst);
casFeatCode_name = (null == casFeat_name) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_name).getCode();
casFeat_entity = jcas.getRequiredFeatureDE(casType, "entity", "uima.cas.String", featOkTst);
casFeatCode_entity = (null == casFeat_entity) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_entity).getCode();
} }
} }

View File

@ -32,6 +32,18 @@
<name>org.apache.solr.uima.ts.EntityAnnotation</name> <name>org.apache.solr.uima.ts.EntityAnnotation</name>
<description/> <description/>
<supertypeName>uima.tcas.Annotation</supertypeName> <supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
<name>name</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
<featureDescription>
<name>entity</name>
<description/>
<rangeTypeName>uima.cas.String</rangeTypeName>
</featureDescription>
</features>
</typeDescription> </typeDescription>
</types> </types>
</typeSystemDescription> </typeSystemDescription>

View File

@ -597,6 +597,7 @@
stored="true" multiValued="true"/> <dynamicField name="random_*" stored="true" multiValued="true"/> <dynamicField name="random_*"
type="random" /> type="random" />
--> -->
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- <!--
uncomment the following to ignore any fields that don't already uncomment the following to ignore any fields that don't already
match an existing field name or dynamic field, rather than match an existing field name or dynamic field, rather than

View File

@ -995,8 +995,9 @@
<lst name="type"> <lst name="type">
<str name="name">org.apache.solr.uima.ts.EntityAnnotation</str> <str name="name">org.apache.solr.uima.ts.EntityAnnotation</str>
<lst name="mapping"> <lst name="mapping">
<str name="feature">coveredText</str> <str name="feature">entity</str>
<str name="field">entity</str> <str name="fieldNameFeature">name</str>
<str name="dynamicField">*_sm</str>
</lst> </lst>
</lst> </lst>
</lst> </lst>