mirror of https://github.com/apache/lucene.git
SOLR-1033 -- Current entity's namespace is made available to all Transformers. This allows one to use an output field of TemplateTransformer in other transformers, among other things.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@747664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
19d7b267a7
commit
c8e94bec4b
|
@ -82,6 +82,10 @@ New Features
|
|||
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
|
||||
(Jon Baer, Noble Paul via shalin)
|
||||
|
||||
19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
|
||||
of TemplateTransformer in other transformers, among other things.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
|
||||
|
|
|
@ -51,9 +51,6 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
|
||||
protected String query;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map session;
|
||||
|
||||
protected String onError = ABORT;
|
||||
|
||||
public void init(Context context) {
|
||||
|
@ -67,7 +64,6 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
}
|
||||
resolver = (VariableResolverImpl) context.getVariableResolver();
|
||||
query = null;
|
||||
session = null;
|
||||
isFirstInit = false;
|
||||
|
||||
}
|
||||
|
@ -169,6 +165,7 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
if (rows != null) {
|
||||
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
|
||||
for (Map<String, Object> map : rows) {
|
||||
resolver.addNamespace(entityName, map);
|
||||
Object o = t.transformRow(map, context);
|
||||
if (o == null)
|
||||
continue;
|
||||
|
@ -184,6 +181,7 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
}
|
||||
rows = tmpRows;
|
||||
} else {
|
||||
resolver.addNamespace(entityName, transformedRow);
|
||||
Object o = t.transformRow(transformedRow, context);
|
||||
if (o == null)
|
||||
return null;
|
||||
|
@ -253,19 +251,6 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
return null;
|
||||
}
|
||||
|
||||
public void setSessionAttribute(Object key, Object val) {
|
||||
if (session == null) {
|
||||
session = new HashMap();
|
||||
}
|
||||
session.put(key, val);
|
||||
}
|
||||
|
||||
public Object getSessionAttribute(Object key) {
|
||||
if (session == null)
|
||||
return null;
|
||||
return session.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
|
||||
* stream rows one-by-one. Return null to signal end of rows
|
||||
|
@ -282,14 +267,6 @@ public class EntityProcessorBase extends EntityProcessor {
|
|||
/*no op*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the internal session maintained by this EntityProcessor
|
||||
*/
|
||||
public void clearSession() {
|
||||
if (session != null)
|
||||
session.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
|
|
|
@ -53,26 +53,10 @@ public class TemplateTransformer extends Transformer {
|
|||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
|
||||
String entityName = context.getEntityAttribute(DataImporter.NAME);
|
||||
|
||||
VariableResolverImpl resolver = (VariableResolverImpl) context
|
||||
.getVariableResolver();
|
||||
Map<String, Object> resolverMap = (Map<String, Object>) resolver
|
||||
.resolve(entityName);
|
||||
|
||||
// Clone resolver map because the resolver map contains common fields or any
|
||||
// others
|
||||
// that the entity processor chooses to keep.
|
||||
Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
|
||||
if (resolverMap != null) {
|
||||
for (Map.Entry<String, Object> entry : resolverMap.entrySet())
|
||||
resolverMapCopy.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
// Add current row to the copy of resolver map
|
||||
for (Map.Entry<String, Object> entry : row.entrySet())
|
||||
resolverMapCopy.put(entry.getKey(), entry.getValue());
|
||||
// Add this copy to the namespace of the current entity in the resolver
|
||||
resolver.addNamespace(entityName, resolverMapCopy);
|
||||
// for (Map.Entry<String, Object> entry : row.entrySet())
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
String expr = map.get(TEMPLATE);
|
||||
|
@ -98,8 +82,6 @@ public class TemplateTransformer extends Transformer {
|
|||
row.put(column, resolver.replaceTokens(expr));
|
||||
}
|
||||
|
||||
// Restore the original resolver map
|
||||
resolver.addNamespace(entityName, resolverMap);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
|
|
@ -182,15 +182,12 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
|
||||
r = getNext();
|
||||
if (r == null) {
|
||||
Object hasMore = getSessionAttribute(HAS_MORE);
|
||||
Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
|
||||
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
|
||||
String url = (String) getSessionAttribute(NEXT_URL);
|
||||
String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
|
||||
if (url == null)
|
||||
url = context.getEntityAttribute(URL);
|
||||
Map namespace = (Map) getSessionAttribute(entityName);
|
||||
if (namespace != null)
|
||||
resolver.addNamespace(entityName, namespace);
|
||||
clearSession();
|
||||
addNamespace();
|
||||
initQuery(resolver.replaceTokens(url));
|
||||
r = getNext();
|
||||
if (r == null)
|
||||
|
@ -199,12 +196,41 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
addCommonFields(r);
|
||||
r = applyTransformer(r);
|
||||
if (r != null)
|
||||
return readUsefulVars(r);
|
||||
}
|
||||
}
|
||||
|
||||
private void addNamespace() {
|
||||
Map<String, Object> namespace = new HashMap<String, Object>();
|
||||
Set<String> allNames = new HashSet<String>();
|
||||
if (commonFields != null) allNames.addAll(commonFields);
|
||||
if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
|
||||
if(allNames.isEmpty()) return;
|
||||
|
||||
for (String name : allNames) {
|
||||
Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
|
||||
if (val != null) namespace.put(name, val);
|
||||
}
|
||||
resolver.addNamespace(entityName, namespace);
|
||||
|
||||
}
|
||||
|
||||
private void addCommonFields(Map<String, Object> r) {
|
||||
if(commonFields != null){
|
||||
for (String commonField : commonFields) {
|
||||
if(r.get(commonField) == null) {
|
||||
Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
|
||||
if(val != null) r.put(commonField, val);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void initQuery(String s) {
|
||||
Reader data = null;
|
||||
try {
|
||||
|
@ -321,28 +347,21 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
|
||||
Object val = r.get(HAS_MORE);
|
||||
if (val != null)
|
||||
setSessionAttribute(HAS_MORE, val);
|
||||
context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
|
||||
val = r.get(NEXT_URL);
|
||||
if (val != null)
|
||||
setSessionAttribute(NEXT_URL, val);
|
||||
context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
|
||||
if (placeHolderVariables != null) {
|
||||
Map namespace = getNameSpace();
|
||||
for (String s : placeHolderVariables) {
|
||||
val = r.get(s);
|
||||
if (val != null)
|
||||
namespace.put(s, val);
|
||||
context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
|
||||
}
|
||||
}
|
||||
if (commonFields != null) {
|
||||
for (String s : commonFields) {
|
||||
Object commonVal = r.get(s);
|
||||
if (commonVal != null) {
|
||||
setSessionAttribute(s, commonVal);
|
||||
getNameSpace().put(s, commonVal);
|
||||
} else {
|
||||
commonVal = getSessionAttribute(s);
|
||||
if (commonVal != null)
|
||||
r.put(s, commonVal);
|
||||
context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -422,15 +441,6 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map getNameSpace() {
|
||||
Map namespace = (Map) getSessionAttribute(entityName);
|
||||
if (namespace == null) {
|
||||
namespace = new HashMap();
|
||||
setSessionAttribute(entityName, namespace);
|
||||
}
|
||||
return namespace;
|
||||
}
|
||||
|
||||
public static final String URL = "url";
|
||||
|
||||
|
|
|
@ -25,9 +25,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for RegexTransformer
|
||||
* </p>
|
||||
* <p> Test for RegexTransformer </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
|
@ -37,14 +35,14 @@ public class TestRegexTransformer {
|
|||
@Test
|
||||
public void commaSeparated() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
// <field column="col1" sourceColName="a" splitBy="," />
|
||||
fields.add(getField("col1", "string", null, "a", ","));
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, fields, null);
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null);
|
||||
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
String s = "a,bb,cc,d";
|
||||
src.put("a", s);
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
src.put("a", "a,bb,cc,d");
|
||||
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src, context);
|
||||
Assert.assertEquals(2, result.size());
|
||||
Assert.assertEquals(4, ((List) result.get("col1")).size());
|
||||
}
|
||||
|
@ -52,14 +50,17 @@ public class TestRegexTransformer {
|
|||
@Test
|
||||
public void replaceWith() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
// <field column="name" sourceColName="a" regexp="'" replaceWith="''" />
|
||||
Map<String, String> fld = getField("name", "string", "'", null, null);
|
||||
fld.put("replaceWith", "''");
|
||||
fields.add(fld);
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, fields, null);
|
||||
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
String s = "D'souza";
|
||||
src.put("name", s);
|
||||
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
Assert.assertEquals("D''souza", result.get("name"));
|
||||
|
@ -67,36 +68,57 @@ public class TestRegexTransformer {
|
|||
|
||||
@Test
|
||||
public void mileage() {
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, getFields(), null);
|
||||
List<Map<String, String>> fields = getFields();
|
||||
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
// add another regex which reuses result from previous regex again!
|
||||
// <field column="hltCityMPG" sourceColName="rowdata" regexp="(${e.city_mileage})" />
|
||||
Map<String, String> fld = getField("hltCityMPG", "string",
|
||||
".*(${e.city_mileage})", "rowdata", null);
|
||||
fld.put("replaceWith", "*** $1 ***");
|
||||
fields.add(fld);
|
||||
|
||||
Map<String, Object> row = new HashMap<String, Object>();
|
||||
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
|
||||
src.put("rowdata", s);
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
Assert.assertEquals(3, result.size());
|
||||
row.put("rowdata", s);
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
resolver.addNamespace("e", row);
|
||||
Map<String, String> eAttrs = AbstractDataImportHandlerTest.createMap("name", "e");
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs);
|
||||
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(row, context);
|
||||
Assert.assertEquals(4, result.size());
|
||||
Assert.assertEquals(s, result.get("rowdata"));
|
||||
Assert.assertEquals("26", result.get("highway_mileage"));
|
||||
Assert.assertEquals("19", result.get("city_mileage"));
|
||||
|
||||
Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG"));
|
||||
}
|
||||
|
||||
public static List<Map<String, String>> getFields() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
|
||||
// <field column="city_mileage" sourceColName="rowdata" regexp=
|
||||
// "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City"
|
||||
fields.add(getField("city_mileage", "sint",
|
||||
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
|
||||
"rowdata", null));
|
||||
|
||||
// <field column="highway_mileage" sourceColName="rowdata" regexp=
|
||||
// "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City"
|
||||
fields.add(getField("highway_mileage", "sint",
|
||||
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
|
||||
"rowdata", null));
|
||||
|
||||
// <field column="seating_capacity" sourceColName="rowdata" regexp="Seating capacity:(.*)"
|
||||
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
|
||||
"rowdata", null));
|
||||
fields
|
||||
.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
|
||||
|
||||
// <field column="warranty" sourceColName="rowdata" regexp="Warranty:(.*)" />
|
||||
fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
|
||||
|
||||
// <field column="rowdata" sourceColName="rowdata" />
|
||||
fields.add(getField("rowdata", "string", null, "rowdata", null));
|
||||
return fields;
|
||||
|
||||
}
|
||||
|
||||
public static Map<String, String> getField(String col, String type,
|
||||
|
|
|
@ -43,11 +43,17 @@ public class TestTemplateTransformer {
|
|||
fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
|
||||
TemplateTransformer.TEMPLATE,
|
||||
"${e.lastName}, ${e.firstName} ${e.middleName}"));
|
||||
// test reuse of template output in another template
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname",
|
||||
TemplateTransformer.TEMPLATE,"Mr ${e.name}"));
|
||||
|
||||
Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
|
||||
"middleName", "Shekhar", "lastName", "Mangar");
|
||||
Map row = AbstractDataImportHandlerTest.createMap(
|
||||
"firstName", "Shalin",
|
||||
"middleName", "Shekhar",
|
||||
"lastName", "Mangar");
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
resolver.addNamespace("e", row);
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"name", "e");
|
||||
|
||||
|
@ -55,6 +61,7 @@ public class TestTemplateTransformer {
|
|||
null, 0, fields, entityAttrs);
|
||||
new TemplateTransformer().transformRow(row, context);
|
||||
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
|
||||
Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue