SOLR-742 -- Add ability to create dynamic fields with custom DataImportHandler transformers

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@708967 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2008-10-29 18:05:26 +00:00
parent 411a2fe607
commit 19c70e9df5
5 changed files with 103 additions and 46 deletions

View File

@ -40,6 +40,9 @@ Bug Fixes
3. SOLR-728: Add synchronization to avoid race condition of multiple imports working concurrently
(Walter Ferrara, shalin)
4. SOLR-742: Add ability to create dynamic fields with custom DataImportHandler transformers
(Wojtek Piaseczny, Noble Paul, shalin)
Documentation
----------------------

View File

@ -129,6 +129,12 @@ public class DataConfig {
public List<Field> implicitFields;
public Map<String, Field> colNameVsField;
public Map<String, Field> lowercaseColNameVsField;
public Entity rootEntity;
public Entity() {
}
@ -141,8 +147,14 @@ public class DataConfig {
allAttributes = getAllAttributes(element);
List<Element> n = getChildNodes(element, "field");
fields = new ArrayList<Field>();
for (Element elem : n)
fields.add(new Field(elem));
colNameVsField = new HashMap<String, Field>();
lowercaseColNameVsField = new HashMap<String, Field>();
for (Element elem : n) {
Field field = new Field(elem);
fields.add(field);
colNameVsField.put(field.column, field);
lowercaseColNameVsField.put(field.column.toLowerCase(), field);
}
n = getChildNodes(element, "entity");
if (!n.isEmpty())
entities = new ArrayList<Entity>();
@ -151,6 +163,14 @@ public class DataConfig {
}
public void setParentEntity(Entity parentEntity) {
this.parentEntity = parentEntity;
while (parentEntity.parentEntity != null) {
parentEntity = parentEntity.parentEntity;
}
this.rootEntity = parentEntity;
}
public void clearCache() {
if (entities != null) {
for (Entity entity : entities)

View File

@ -100,7 +100,7 @@ public class DataImporter {
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
initEntity(e, fields, false);
e.implicitFields = new ArrayList<DataConfig.Field>();
String errs = verifyWithSchema(fields, e.implicitFields);
String errs = verifyWithSchema(e, fields);
if (e.implicitFields.isEmpty())
e.implicitFields = null;
if (errs != null) {
@ -111,8 +111,7 @@ public class DataImporter {
}
}
private String verifyWithSchema(Map<String, DataConfig.Field> fields,
List<DataConfig.Field> autoFields) {
private String verifyWithSchema(DataConfig.Entity e, Map<String, DataConfig.Field> fields) {
List<String> errors = new ArrayList<String>();
Map<String, SchemaField> schemaFields = schema.getFields();
for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) {
@ -123,7 +122,10 @@ public class DataImporter {
.info(sf.getName()
+ " is a required field in SolrSchema . But not found in DataConfig");
}
autoFields.add(new DataConfig.Field(sf.getName(), sf.multiValued()));
DataConfig.Field field = new DataConfig.Field(sf.getName(), sf.multiValued());
e.implicitFields.add(field);
e.colNameVsField.put(field.column, field);
e.lowercaseColNameVsField.put(field.column.toLowerCase(), field);
}
}
for (Map.Entry<String, DataConfig.Field> entry : fields.entrySet()) {
@ -132,7 +134,7 @@ public class DataImporter {
try {
fieldType = schema.getDynamicFieldType(fld.name);
} catch (RuntimeException e) {
} catch (RuntimeException ex) {
// Ignore because it may not be a dynamic field
}
@ -233,7 +235,7 @@ public class DataImporter {
if (e.entities == null)
return;
for (DataConfig.Entity e1 : e.entities) {
e1.parentEntity = e;
e1.setParentEntity(e);
initEntity(e1, fields, e.isDocRoot || docRootFound);
}
@ -530,6 +532,9 @@ public class DataImporter {
}
}
IndexSchema getSchema() {
return schema;
}
SolrCore getCore() {
return core;

View File

@ -19,13 +19,15 @@ package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.schema.SchemaField;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
@ -364,56 +366,53 @@ public class DocBuilder {
}
@SuppressWarnings("unchecked")
private void addFields(DataConfig.Entity entity, SolrInputDocument doc,
Map<String, Object> arow) {
DataConfig.Entity parentMost = entity;
while (parentMost.parentEntity != null)
parentMost = parentMost.parentEntity;
for (DataConfig.Field field : entity.fields) {
addFieldValue(field, arow, null, doc);
}
if (parentMost.implicitFields != null) {
Map<String, Object> lowerCaseMap = new HashMap<String, Object>();
for (Map.Entry<String, Object> entry : arow.entrySet())
lowerCaseMap.put(entry.getKey().toLowerCase(), entry.getValue());
for (DataConfig.Field automaticField : parentMost.implicitFields) {
addFieldValue(automaticField, arow, lowerCaseMap, doc);
private void addFields(DataConfig.Entity entity, SolrInputDocument doc, Map<String, Object> arow) {
for (Map.Entry<String, Object> entry : arow.entrySet()) {
String key = entry.getKey();
DataConfig.Field field = entity.colNameVsField.get(key);
if (field == null) {
field = entity.lowercaseColNameVsField.get(key.toLowerCase());
}
if (field == null && entity.rootEntity != null) {
field = entity.rootEntity.colNameVsField.get(key);
}
if (field == null && entity.rootEntity != null) {
field = entity.rootEntity.lowercaseColNameVsField.get(key.toLowerCase());
}
if (field == null) {
// This can be a dynamic field
SchemaField sf = dataImporter.getSchema().getFieldOrNull(key);
if (sf != null) {
addFieldToDoc(entry.getValue(), key, 1.0f, sf.multiValued(), doc);
}
} else {
if (field.toWrite) {
addFieldToDoc(entry.getValue(), key, field.boost, field.multiValued, doc);
}
}
}
}
private void addFieldValue(DataConfig.Field field, Map<String, Object> arow,
Map<String, Object> lowerCaseMap, SolrInputDocument doc) {
if (!field.toWrite)
return;
Object value = arow.get(field.column);
if (value == null) {
if (lowerCaseMap != null) {
value = lowerCaseMap.get(field.column.toLowerCase());
}
if (value == null)
return;
}
private void addFieldToDoc(Object value, String name, float boost, boolean multiValued, SolrInputDocument doc) {
if (value instanceof Collection) {
Collection collection = (Collection) value;
if (field.multiValued) {
if (multiValued) {
for (Object o : collection) {
doc.addField(field.nameOrColName, o, field.boost);
doc.addField(name, o, boost);
}
} else {
if (doc.getField(field.nameOrColName) == null)
if (doc.getField(name) == null)
for (Object o : collection) {
doc.addField(field.nameOrColName, o, field.boost);
doc.addField(name, o,boost);
break;
}
}
} else if (field.multiValued) {
doc.addField(field.nameOrColName, value, field.boost);
} else if (multiValued) {
doc.addField(name, value, boost);
} else {
if (doc.getField(field.nameOrColName) == null)
doc.addField(field.nameOrColName, value, field.boost);
if (doc.getField(name) == null)
doc.addField(name, value, boost);
}
}

View File

@ -68,6 +68,19 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
assertQ(req("id:1"), "//*[@numFound='1']");
}
@Test
@SuppressWarnings("unchecked")
public void testDynamicFields() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(dataConfigWithDynamicTransformer);
assertQ(req("id:1"), "//*[@numFound='1']");
assertQ(req("dynamic_s:test"), "//*[@numFound='1']");
}
@Test
@SuppressWarnings("unchecked")
public void testRequestParamsAsVariable() throws Exception {
@ -100,6 +113,14 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
}
}
public static class AddDynamicFieldTransformer extends Transformer {
public Object transformRow(Map<String, Object> row, Context context) {
// Add a dynamic field
row.put("dynamic_s", "test");
return row;
}
}
public static class MockDataSource2 extends MockDataSource {
}
@ -114,4 +135,13 @@ public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
" </document>\n" +
"</dataConfig>";
private final String dataConfigWithDynamicTransformer = "<dataConfig>\n" +
" <document>\n" +
" <entity name=\"books\" query=\"select * from x\"" +
" transformer=\"TestDocBuilder2$AddDynamicFieldTransformer\">\n" +
" <field column=\"id\" />\n" +
" <field column=\"desc\" />\n" +
" </entity>\n" +
" </document>\n" +
"</dataConfig>";
}