mirror of https://github.com/apache/lucene.git
SOLR-2503: feature value map to dynamicField
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1101047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1d842018fc
commit
b30e4e4ec8
|
@ -28,6 +28,11 @@ Upgrading from Solr 3.1
|
|||
It should move to UIMAUpdateRequestProcessorFactory setting.
|
||||
See contrib/uima/README.txt for more details. (SOLR-2436)
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
|
||||
|
||||
Test Cases:
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -30,14 +30,14 @@ public class SolrUIMAConfiguration {
|
|||
|
||||
private boolean fieldsMerging;
|
||||
|
||||
private Map<String, Map<String, String>> typesFeaturesFieldsMapping;
|
||||
private Map<String, Map<String, MapField>> typesFeaturesFieldsMapping;
|
||||
|
||||
private String aePath;
|
||||
|
||||
private Map<String, Object> runtimeParameters;
|
||||
|
||||
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
|
||||
Map<String, Map<String, String>> typesFeaturesFieldsMapping,
|
||||
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
|
||||
Map<String, Object> runtimeParameters) {
|
||||
this.aePath = aePath;
|
||||
this.fieldsToAnalyze = fieldsToAnalyze;
|
||||
|
@ -54,7 +54,7 @@ public class SolrUIMAConfiguration {
|
|||
return fieldsMerging;
|
||||
}
|
||||
|
||||
public Map<String, Map<String, String>> getTypesFeaturesFieldsMapping() {
|
||||
public Map<String, Map<String, MapField>> getTypesFeaturesFieldsMapping() {
|
||||
return typesFeaturesFieldsMapping;
|
||||
}
|
||||
|
||||
|
@ -65,4 +65,39 @@ public class SolrUIMAConfiguration {
|
|||
public Map<String, Object> getRuntimeParameters() {
|
||||
return runtimeParameters;
|
||||
}
|
||||
|
||||
static final class MapField {
|
||||
|
||||
private String fieldName, fieldNameFeature;
|
||||
private boolean prefix; // valid if dynamicField == true
|
||||
// false: *_s, true: s_*
|
||||
|
||||
MapField(String fieldName, String fieldNameFeature){
|
||||
this.fieldName = fieldName;
|
||||
this.fieldNameFeature = fieldNameFeature;
|
||||
if(fieldNameFeature != null){
|
||||
if(fieldName.startsWith("*")){
|
||||
prefix = false;
|
||||
this.fieldName = fieldName.substring(1);
|
||||
}
|
||||
else if(fieldName.endsWith("*")){
|
||||
prefix = true;
|
||||
this.fieldName = fieldName.substring(0, fieldName.length() - 1);
|
||||
}
|
||||
else
|
||||
throw new RuntimeException("static field name cannot be used for dynamicField");
|
||||
}
|
||||
}
|
||||
|
||||
String getFieldNameFeature(){
|
||||
return fieldNameFeature;
|
||||
}
|
||||
|
||||
String getFieldName(String featureValue){
|
||||
if(fieldNameFeature != null){
|
||||
return prefix ? fieldName + featureValue : featureValue + fieldName;
|
||||
}
|
||||
return fieldName;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
|
||||
/**
|
||||
* Read configuration for Solr-UIMA integration
|
||||
|
@ -62,8 +63,8 @@ public class SolrUIMAConfigurationReader {
|
|||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
private Map<String, Map<String, String>> readTypesFeaturesFieldsMapping() {
|
||||
Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
|
||||
private Map<String, Map<String, MapField>> readTypesFeaturesFieldsMapping() {
|
||||
Map<String, Map<String, MapField>> map = new HashMap<String, Map<String, MapField>>();
|
||||
|
||||
NamedList fieldMappings = (NamedList) args.get("fieldMappings");
|
||||
/* iterate over UIMA types */
|
||||
|
@ -71,13 +72,21 @@ public class SolrUIMAConfigurationReader {
|
|||
NamedList type = (NamedList) fieldMappings.get("type", i);
|
||||
String typeName = (String)type.get("name");
|
||||
|
||||
Map<String, String> subMap = new HashMap<String, String>();
|
||||
Map<String, MapField> subMap = new HashMap<String, MapField>();
|
||||
/* iterate over mapping definitions */
|
||||
for(int j = 0; j < type.size() - 1; j++){
|
||||
NamedList mapping = (NamedList) type.get("mapping", j + 1);
|
||||
String featureName = (String) mapping.get("feature");
|
||||
String fieldNameFeature = null;
|
||||
String mappedFieldName = (String) mapping.get("field");
|
||||
subMap.put(featureName, mappedFieldName);
|
||||
if(mappedFieldName == null){
|
||||
fieldNameFeature = (String) mapping.get("fieldNameFeature");
|
||||
mappedFieldName = (String) mapping.get("dynamicField");
|
||||
}
|
||||
if(mappedFieldName == null)
|
||||
throw new RuntimeException("either of field or dynamicField should be defined for feature " + featureName);
|
||||
MapField mapField = new MapField(mappedFieldName, fieldNameFeature);
|
||||
subMap.put(featureName, mapField);
|
||||
}
|
||||
map.put(typeName, subMap);
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.uima.processor;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.uima.cas.FSIterator;
|
||||
import org.apache.uima.cas.FeatureStructure;
|
||||
import org.apache.uima.cas.Type;
|
||||
|
@ -53,7 +54,7 @@ public class UIMAToSolrMapper {
|
|||
* name of UIMA type to map
|
||||
* @param featureFieldsmapping
|
||||
*/
|
||||
public void map(String typeName, Map<String, String> featureFieldsmapping) {
|
||||
public void map(String typeName, Map<String, MapField> featureFieldsmapping) {
|
||||
try {
|
||||
FeatureStructure fsMock = (FeatureStructure) Class.forName(typeName).getConstructor(
|
||||
JCas.class).newInstance(cas);
|
||||
|
@ -62,7 +63,11 @@ public class UIMAToSolrMapper {
|
|||
.hasNext();) {
|
||||
FeatureStructure fs = iterator.next();
|
||||
for (String featureName : featureFieldsmapping.keySet()) {
|
||||
String fieldName = featureFieldsmapping.get(featureName);
|
||||
MapField mapField = featureFieldsmapping.get(featureName);
|
||||
String fieldNameFeature = mapField.getFieldNameFeature();
|
||||
String fieldNameFeatureValue = fieldNameFeature == null ? null :
|
||||
fs.getFeatureValueAsString(type.getFeatureByBaseName(fieldNameFeature));
|
||||
String fieldName = mapField.getFieldName(fieldNameFeatureValue);
|
||||
log.info(new StringBuffer("mapping ").append(typeName).append("@").append(featureName)
|
||||
.append(" to ").append(fieldName).toString());
|
||||
String featureValue = null;
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.solr.uima.processor.ae.AEProvider;
|
||||
import org.apache.solr.uima.processor.ae.AEProviderFactory;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
@ -69,7 +70,7 @@ public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
|
|||
|
||||
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
|
||||
/* get field mapping from config */
|
||||
Map<String, Map<String, String>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
|
||||
.getTypesFeaturesFieldsMapping();
|
||||
/* map type features on fields */
|
||||
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.solr.core.SolrCore;
|
|||
import org.apache.solr.handler.XmlUpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||
import org.junit.Before;
|
||||
|
@ -82,11 +83,11 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
UpdateRequestProcessor processor = factory.getInstance(req(), null, null);
|
||||
assertTrue(processor instanceof UIMAUpdateRequestProcessor);
|
||||
SolrUIMAConfiguration conf = ((UIMAUpdateRequestProcessor)processor).solrUIMAConfiguration;
|
||||
Map<String, Map<String, String>> map = conf.getTypesFeaturesFieldsMapping();
|
||||
Map<String, String> subMap = map.get("a-type-which-can-have-multiple-features");
|
||||
Map<String, Map<String, MapField>> map = conf.getTypesFeaturesFieldsMapping();
|
||||
Map<String, MapField> subMap = map.get("a-type-which-can-have-multiple-features");
|
||||
assertEquals(2, subMap.size());
|
||||
assertEquals("1", subMap.get("A"));
|
||||
assertEquals("2", subMap.get("B"));
|
||||
assertEquals("1", subMap.get("A").getFieldName(null));
|
||||
assertEquals("2", subMap.get("B").getFieldName(null));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -104,7 +105,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
assertU(commit());
|
||||
assertQ(req("sentence:*"), "//*[@numFound='1']");
|
||||
assertQ(req("sentiment:*"), "//*[@numFound='0']");
|
||||
assertQ(req("entity:Prague"), "//*[@numFound='1']");
|
||||
assertQ(req("OTHER_sm:Prague"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -124,7 +125,7 @@ public class UIMAUpdateRequestProcessorTest extends SolrTestCaseJ4 {
|
|||
assertQ(req("sentence:*"), "//*[@numFound='2']");
|
||||
|
||||
assertQ(req("sentiment:positive"), "//*[@numFound='1']");
|
||||
assertQ(req("entity:Apache"), "//*[@numFound='2']");
|
||||
assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
|
|
|
@ -34,6 +34,12 @@ public class DummyEntityAnnotator extends JCasAnnotator_ImplBase{
|
|||
EntityAnnotation entityAnnotation = new EntityAnnotation(jcas);
|
||||
entityAnnotation.setBegin(annotation.getBegin());
|
||||
entityAnnotation.setEnd(annotation.getEnd());
|
||||
String entityString = annotation.getCoveredText();
|
||||
entityAnnotation.setEntity(entityString);
|
||||
String name = "OTHER"; // "OTHER" makes no sense. In practice, "PERSON", "COUNTRY", "E-MAIL", etc.
|
||||
if(entityString.equals("Apache"))
|
||||
name = "ORGANIZATION";
|
||||
entityAnnotation.setName(name);
|
||||
entityAnnotation.addToIndexes();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
|
||||
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
|
||||
/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
|
||||
package org.apache.solr.uima.ts;
|
||||
|
||||
import org.apache.uima.jcas.JCas;
|
||||
|
@ -11,8 +11,8 @@ import org.apache.uima.jcas.tcas.Annotation;
|
|||
|
||||
|
||||
/**
|
||||
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
|
||||
* XML source: /Users/tommasoteofili/Documents/workspaces/lucene_workspace/lucene_dev/solr/contrib/uima/src/test/resources/DummyEntityAEDescriptor.xml
|
||||
* Updated by JCasGen Sat May 07 22:33:38 JST 2011
|
||||
* XML source: /Users/koji/Documents/workspace/DummyEntityAnnotator/desc/DummyEntityAEDescriptor.xml
|
||||
* @generated */
|
||||
public class EntityAnnotation extends Annotation {
|
||||
/** @generated
|
||||
|
@ -57,6 +57,42 @@ public class EntityAnnotation extends Annotation {
|
|||
@generated modifiable */
|
||||
private void readObject() {}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//*--------------*
|
||||
//* Feature: name
|
||||
|
||||
/** getter for name - gets
|
||||
* @generated */
|
||||
public String getName() {
|
||||
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
|
||||
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name);}
|
||||
|
||||
/** setter for name - sets
|
||||
* @generated */
|
||||
public void setName(String v) {
|
||||
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_name == null)
|
||||
jcasType.jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_name, v);}
|
||||
|
||||
|
||||
//*--------------*
|
||||
//* Feature: entity
|
||||
|
||||
/** getter for entity - gets
|
||||
* @generated */
|
||||
public String getEntity() {
|
||||
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
|
||||
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
return jcasType.ll_cas.ll_getStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity);}
|
||||
|
||||
/** setter for entity - sets
|
||||
* @generated */
|
||||
public void setEntity(String v) {
|
||||
if (EntityAnnotation_Type.featOkTst && ((EntityAnnotation_Type)jcasType).casFeat_entity == null)
|
||||
jcasType.jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
jcasType.ll_cas.ll_setStringValue(addr, ((EntityAnnotation_Type)jcasType).casFeatCode_entity, v);}
|
||||
}
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
/* First created by JCasGen Fri Mar 04 12:48:08 CET 2011 */
|
||||
/* First created by JCasGen Sat May 07 22:33:38 JST 2011 */
|
||||
package org.apache.solr.uima.ts;
|
||||
|
||||
import org.apache.uima.jcas.JCas;
|
||||
|
@ -9,10 +9,12 @@ import org.apache.uima.cas.impl.FSGenerator;
|
|||
import org.apache.uima.cas.FeatureStructure;
|
||||
import org.apache.uima.cas.impl.TypeImpl;
|
||||
import org.apache.uima.cas.Type;
|
||||
import org.apache.uima.cas.impl.FeatureImpl;
|
||||
import org.apache.uima.cas.Feature;
|
||||
import org.apache.uima.jcas.tcas.Annotation_Type;
|
||||
|
||||
/**
|
||||
* Updated by JCasGen Fri Mar 04 12:50:14 CET 2011
|
||||
* Updated by JCasGen Sat May 07 22:33:38 JST 2011
|
||||
* @generated */
|
||||
public class EntityAnnotation_Type extends Annotation_Type {
|
||||
/** @generated */
|
||||
|
@ -38,6 +40,42 @@ public class EntityAnnotation_Type extends Annotation_Type {
|
|||
/** @generated
|
||||
@modifiable */
|
||||
public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.solr.uima.ts.EntityAnnotation");
|
||||
|
||||
/** @generated */
|
||||
final Feature casFeat_name;
|
||||
/** @generated */
|
||||
final int casFeatCode_name;
|
||||
/** @generated */
|
||||
public String getName(int addr) {
|
||||
if (featOkTst && casFeat_name == null)
|
||||
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
return ll_cas.ll_getStringValue(addr, casFeatCode_name);
|
||||
}
|
||||
/** @generated */
|
||||
public void setName(int addr, String v) {
|
||||
if (featOkTst && casFeat_name == null)
|
||||
jcas.throwFeatMissing("name", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
ll_cas.ll_setStringValue(addr, casFeatCode_name, v);}
|
||||
|
||||
|
||||
|
||||
/** @generated */
|
||||
final Feature casFeat_entity;
|
||||
/** @generated */
|
||||
final int casFeatCode_entity;
|
||||
/** @generated */
|
||||
public String getEntity(int addr) {
|
||||
if (featOkTst && casFeat_entity == null)
|
||||
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
return ll_cas.ll_getStringValue(addr, casFeatCode_entity);
|
||||
}
|
||||
/** @generated */
|
||||
public void setEntity(int addr, String v) {
|
||||
if (featOkTst && casFeat_entity == null)
|
||||
jcas.throwFeatMissing("entity", "org.apache.solr.uima.ts.EntityAnnotation");
|
||||
ll_cas.ll_setStringValue(addr, casFeatCode_entity, v);}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -47,6 +85,14 @@ public class EntityAnnotation_Type extends Annotation_Type {
|
|||
super(jcas, casType);
|
||||
casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
|
||||
|
||||
|
||||
casFeat_name = jcas.getRequiredFeatureDE(casType, "name", "uima.cas.String", featOkTst);
|
||||
casFeatCode_name = (null == casFeat_name) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_name).getCode();
|
||||
|
||||
|
||||
casFeat_entity = jcas.getRequiredFeatureDE(casType, "entity", "uima.cas.String", featOkTst);
|
||||
casFeatCode_entity = (null == casFeat_entity) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_entity).getCode();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,18 @@
|
|||
<name>org.apache.solr.uima.ts.EntityAnnotation</name>
|
||||
<description/>
|
||||
<supertypeName>uima.tcas.Annotation</supertypeName>
|
||||
<features>
|
||||
<featureDescription>
|
||||
<name>name</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
<featureDescription>
|
||||
<name>entity</name>
|
||||
<description/>
|
||||
<rangeTypeName>uima.cas.String</rangeTypeName>
|
||||
</featureDescription>
|
||||
</features>
|
||||
</typeDescription>
|
||||
</types>
|
||||
</typeSystemDescription>
|
||||
|
|
|
@ -597,6 +597,7 @@
|
|||
stored="true" multiValued="true"/> <dynamicField name="random_*"
|
||||
type="random" />
|
||||
-->
|
||||
<dynamicField name="*_sm" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<!--
|
||||
uncomment the following to ignore any fields that don't already
|
||||
match an existing field name or dynamic field, rather than
|
||||
|
|
|
@ -995,8 +995,9 @@
|
|||
<lst name="type">
|
||||
<str name="name">org.apache.solr.uima.ts.EntityAnnotation</str>
|
||||
<lst name="mapping">
|
||||
<str name="feature">coveredText</str>
|
||||
<str name="field">entity</str>
|
||||
<str name="feature">entity</str>
|
||||
<str name="fieldNameFeature">name</str>
|
||||
<str name="dynamicField">*_sm</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</lst>
|
||||
|
|
Loading…
Reference in New Issue