SOLR-1033 -- Current entity's namespace is made available to all Transformers. This allows one to use an output field of TemplateTransformer in other transformers, among other things.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@747664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2009-02-25 05:27:31 +00:00
parent 19d7b267a7
commit c8e94bec4b
6 changed files with 99 additions and 97 deletions

View File

@ -82,6 +82,10 @@ New Features
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity. 18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
(Jon Baer, Noble Paul via shalin) (Jon Baer, Noble Paul via shalin)
19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
of TemplateTransformer in other transformers, among other things.
(Fergus McMenemie, Noble Paul via shalin)
Optimizations Optimizations
---------------------- ----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used 1. SOLR-846: Reduce memory consumption during delta import by removing keys when used

View File

@ -51,9 +51,6 @@ public class EntityProcessorBase extends EntityProcessor {
protected String query; protected String query;
@SuppressWarnings("unchecked")
private Map session;
protected String onError = ABORT; protected String onError = ABORT;
public void init(Context context) { public void init(Context context) {
@ -67,7 +64,6 @@ public class EntityProcessorBase extends EntityProcessor {
} }
resolver = (VariableResolverImpl) context.getVariableResolver(); resolver = (VariableResolverImpl) context.getVariableResolver();
query = null; query = null;
session = null;
isFirstInit = false; isFirstInit = false;
} }
@ -169,6 +165,7 @@ public class EntityProcessorBase extends EntityProcessor {
if (rows != null) { if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>(); List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
for (Map<String, Object> map : rows) { for (Map<String, Object> map : rows) {
resolver.addNamespace(entityName, map);
Object o = t.transformRow(map, context); Object o = t.transformRow(map, context);
if (o == null) if (o == null)
continue; continue;
@ -184,6 +181,7 @@ public class EntityProcessorBase extends EntityProcessor {
} }
rows = tmpRows; rows = tmpRows;
} else { } else {
resolver.addNamespace(entityName, transformedRow);
Object o = t.transformRow(transformedRow, context); Object o = t.transformRow(transformedRow, context);
if (o == null) if (o == null)
return null; return null;
@ -253,19 +251,6 @@ public class EntityProcessorBase extends EntityProcessor {
return null; return null;
} }
public void setSessionAttribute(Object key, Object val) {
if (session == null) {
session = new HashMap();
}
session.put(key, val);
}
public Object getSessionAttribute(Object key) {
if (session == null)
return null;
return session.get(key);
}
/** /**
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to * For a simple implementation, this is the only method that the sub-class should implement. This is intended to
* stream rows one-by-one. Return null to signal end of rows * stream rows one-by-one. Return null to signal end of rows
@ -282,14 +267,6 @@ public class EntityProcessorBase extends EntityProcessor {
/*no op*/ /*no op*/
} }
/**
* Clears the internal session maintained by this EntityProcessor
*/
public void clearSession() {
if (session != null)
session.clear();
}
/** /**
* Only used by cache implementations * Only used by cache implementations
*/ */

View File

@ -53,26 +53,10 @@ public class TemplateTransformer extends Transformer {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) { public Object transformRow(Map<String, Object> row, Context context) {
String entityName = context.getEntityAttribute(DataImporter.NAME);
VariableResolverImpl resolver = (VariableResolverImpl) context VariableResolverImpl resolver = (VariableResolverImpl) context
.getVariableResolver(); .getVariableResolver();
Map<String, Object> resolverMap = (Map<String, Object>) resolver
.resolve(entityName);
// Clone resolver map because the resolver map contains common fields or any
// others
// that the entity processor chooses to keep.
Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
if (resolverMap != null) {
for (Map.Entry<String, Object> entry : resolverMap.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
}
// Add current row to the copy of resolver map // Add current row to the copy of resolver map
for (Map.Entry<String, Object> entry : row.entrySet()) // for (Map.Entry<String, Object> entry : row.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
// Add this copy to the namespace of the current entity in the resolver
resolver.addNamespace(entityName, resolverMapCopy);
for (Map<String, String> map : context.getAllEntityFields()) { for (Map<String, String> map : context.getAllEntityFields()) {
String expr = map.get(TEMPLATE); String expr = map.get(TEMPLATE);
@ -98,8 +82,6 @@ public class TemplateTransformer extends Transformer {
row.put(column, resolver.replaceTokens(expr)); row.put(column, resolver.replaceTokens(expr));
} }
// Restore the original resolver map
resolver.addNamespace(entityName, resolverMap);
return row; return row;
} }

View File

@ -182,15 +182,12 @@ public class XPathEntityProcessor extends EntityProcessorBase {
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL))); initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
r = getNext(); r = getNext();
if (r == null) { if (r == null) {
Object hasMore = getSessionAttribute(HAS_MORE); Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) { if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
String url = (String) getSessionAttribute(NEXT_URL); String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
if (url == null) if (url == null)
url = context.getEntityAttribute(URL); url = context.getEntityAttribute(URL);
Map namespace = (Map) getSessionAttribute(entityName); addNamespace();
if (namespace != null)
resolver.addNamespace(entityName, namespace);
clearSession();
initQuery(resolver.replaceTokens(url)); initQuery(resolver.replaceTokens(url));
r = getNext(); r = getNext();
if (r == null) if (r == null)
@ -199,12 +196,41 @@ public class XPathEntityProcessor extends EntityProcessorBase {
return null; return null;
} }
} }
addCommonFields(r);
r = applyTransformer(r); r = applyTransformer(r);
if (r != null) if (r != null)
return readUsefulVars(r); return readUsefulVars(r);
} }
} }
private void addNamespace() {
Map<String, Object> namespace = new HashMap<String, Object>();
Set<String> allNames = new HashSet<String>();
if (commonFields != null) allNames.addAll(commonFields);
if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
if(allNames.isEmpty()) return;
for (String name : allNames) {
Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
if (val != null) namespace.put(name, val);
}
resolver.addNamespace(entityName, namespace);
}
private void addCommonFields(Map<String, Object> r) {
if(commonFields != null){
for (String commonField : commonFields) {
if(r.get(commonField) == null) {
Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
if(val != null) r.put(commonField, val);
}
}
}
}
private void initQuery(String s) { private void initQuery(String s) {
Reader data = null; Reader data = null;
try { try {
@ -251,8 +277,8 @@ public class XPathEntityProcessor extends EntityProcessorBase {
} }
}); });
} catch (Exception e) { } catch (Exception e) {
String msg = "Parsing failed for xml, url:" + s + "rows processed :" + rows.size(); String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
if (rows.size() > 0) msg += "last row : " + rows.get(rows.size() - 1); if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
if (ABORT.equals(onError)) { if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e, msg); wrapAndThrow(SEVERE, e, msg);
} else if (SKIP.equals(onError)) { } else if (SKIP.equals(onError)) {
@ -321,28 +347,21 @@ public class XPathEntityProcessor extends EntityProcessorBase {
private Map<String, Object> readUsefulVars(Map<String, Object> r) { private Map<String, Object> readUsefulVars(Map<String, Object> r) {
Object val = r.get(HAS_MORE); Object val = r.get(HAS_MORE);
if (val != null) if (val != null)
setSessionAttribute(HAS_MORE, val); context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
val = r.get(NEXT_URL); val = r.get(NEXT_URL);
if (val != null) if (val != null)
setSessionAttribute(NEXT_URL, val); context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
if (placeHolderVariables != null) { if (placeHolderVariables != null) {
Map namespace = getNameSpace();
for (String s : placeHolderVariables) { for (String s : placeHolderVariables) {
val = r.get(s); val = r.get(s);
if (val != null) context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
namespace.put(s, val);
} }
} }
if (commonFields != null) { if (commonFields != null) {
for (String s : commonFields) { for (String s : commonFields) {
Object commonVal = r.get(s); Object commonVal = r.get(s);
if (commonVal != null) { if (commonVal != null) {
setSessionAttribute(s, commonVal); context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
getNameSpace().put(s, commonVal);
} else {
commonVal = getSessionAttribute(s);
if (commonVal != null)
r.put(s, commonVal);
} }
} }
} }
@ -395,8 +414,8 @@ public class XPathEntityProcessor extends EntityProcessorBase {
if (row == null || row == Collections.EMPTY_MAP) { if (row == null || row == Collections.EMPTY_MAP) {
isEnd.set(true); isEnd.set(true);
if (exp.get() != null) { if (exp.get() != null) {
String msg = "Parsing failed for xml, url:" + s + "rows processed in this xml:" + count; String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
if (lastRow != null) msg += "last row in this xml: " + lastRow; if (lastRow != null) msg += " last row in this xml:" + lastRow;
if (ABORT.equals(onError)) { if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, exp.get(), msg); wrapAndThrow(SEVERE, exp.get(), msg);
} else if (SKIP.equals(onError)) { } else if (SKIP.equals(onError)) {
@ -422,15 +441,6 @@ public class XPathEntityProcessor extends EntityProcessorBase {
} }
@SuppressWarnings("unchecked")
private Map getNameSpace() {
Map namespace = (Map) getSessionAttribute(entityName);
if (namespace == null) {
namespace = new HashMap();
setSessionAttribute(entityName, namespace);
}
return namespace;
}
public static final String URL = "url"; public static final String URL = "url";

View File

@ -25,9 +25,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* <p> * <p> Test for RegexTransformer </p>
* Test for RegexTransformer
* </p>
* *
* @version $Id$ * @version $Id$
* @since solr 1.3 * @since solr 1.3
@ -37,14 +35,14 @@ public class TestRegexTransformer {
@Test @Test
public void commaSeparated() { public void commaSeparated() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>(); List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="col1" sourceColName="a" splitBy="," />
fields.add(getField("col1", "string", null, "a", ",")); fields.add(getField("col1", "string", null, "a", ","));
Context context = AbstractDataImportHandlerTest.getContext(null, null, Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null);
null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>(); Map<String, Object> src = new HashMap<String, Object>();
String s = "a,bb,cc,d"; src.put("a", "a,bb,cc,d");
src.put("a", s);
Map<String, Object> result = new RegexTransformer().transformRow(src, Map<String, Object> result = new RegexTransformer().transformRow(src, context);
context);
Assert.assertEquals(2, result.size()); Assert.assertEquals(2, result.size());
Assert.assertEquals(4, ((List) result.get("col1")).size()); Assert.assertEquals(4, ((List) result.get("col1")).size());
} }
@ -52,14 +50,17 @@ public class TestRegexTransformer {
@Test @Test
public void replaceWith() { public void replaceWith() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>(); List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="name" sourceColName="a" regexp="'" replaceWith="''" />
Map<String, String> fld = getField("name", "string", "'", null, null); Map<String, String> fld = getField("name", "string", "'", null, null);
fld.put("replaceWith", "''"); fld.put("replaceWith", "''");
fields.add(fld); fields.add(fld);
Context context = AbstractDataImportHandlerTest.getContext(null, null, Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null); null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>(); Map<String, Object> src = new HashMap<String, Object>();
String s = "D'souza"; String s = "D'souza";
src.put("name", s); src.put("name", s);
Map<String, Object> result = new RegexTransformer().transformRow(src, Map<String, Object> result = new RegexTransformer().transformRow(src,
context); context);
Assert.assertEquals("D''souza", result.get("name")); Assert.assertEquals("D''souza", result.get("name"));
@ -67,36 +68,57 @@ public class TestRegexTransformer {
@Test @Test
public void mileage() { public void mileage() {
Context context = AbstractDataImportHandlerTest.getContext(null, null, List<Map<String, String>> fields = getFields();
null, 0, getFields(), null);
Map<String, Object> src = new HashMap<String, Object>(); // add another regex which reuses result from previous regex again!
// <field column="hltCityMPG" sourceColName="rowdata" regexp="(${e.city_mileage})" />
Map<String, String> fld = getField("hltCityMPG", "string",
".*(${e.city_mileage})", "rowdata", null);
fld.put("replaceWith", "*** $1 ***");
fields.add(fld);
Map<String, Object> row = new HashMap<String, Object>();
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City"; String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
src.put("rowdata", s); row.put("rowdata", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context); VariableResolverImpl resolver = new VariableResolverImpl();
Assert.assertEquals(3, result.size()); resolver.addNamespace("e", row);
Map<String, String> eAttrs = AbstractDataImportHandlerTest.createMap("name", "e");
Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs);
Map<String, Object> result = new RegexTransformer().transformRow(row, context);
Assert.assertEquals(4, result.size());
Assert.assertEquals(s, result.get("rowdata")); Assert.assertEquals(s, result.get("rowdata"));
Assert.assertEquals("26", result.get("highway_mileage")); Assert.assertEquals("26", result.get("highway_mileage"));
Assert.assertEquals("19", result.get("city_mileage")); Assert.assertEquals("19", result.get("city_mileage"));
Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG"));
} }
public static List<Map<String, String>> getFields() { public static List<Map<String, String>> getFields() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>(); List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="city_mileage" sourceColName="rowdata" regexp=
// "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City"
fields.add(getField("city_mileage", "sint", fields.add(getField("city_mileage", "sint",
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City", "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
"rowdata", null)); "rowdata", null));
// <field column="highway_mileage" sourceColName="rowdata" regexp=
// "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City"
fields.add(getField("highway_mileage", "sint", fields.add(getField("highway_mileage", "sint",
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City", "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
"rowdata", null)); "rowdata", null));
// <field column="seating_capacity" sourceColName="rowdata" regexp="Seating capacity:(.*)"
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)", fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
"rowdata", null)); "rowdata", null));
fields
.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null)); // <field column="warranty" sourceColName="rowdata" regexp="Warranty:(.*)" />
fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
// <field column="rowdata" sourceColName="rowdata" />
fields.add(getField("rowdata", "string", null, "rowdata", null)); fields.add(getField("rowdata", "string", null, "rowdata", null));
return fields; return fields;
} }
public static Map<String, String> getField(String col, String type, public static Map<String, String> getField(String col, String type,

View File

@ -43,11 +43,17 @@ public class TestTemplateTransformer {
fields.add(AbstractDataImportHandlerTest.createMap("column", "name", fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
TemplateTransformer.TEMPLATE, TemplateTransformer.TEMPLATE,
"${e.lastName}, ${e.firstName} ${e.middleName}")); "${e.lastName}, ${e.firstName} ${e.middleName}"));
// test reuse of template output in another template
fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname",
TemplateTransformer.TEMPLATE,"Mr ${e.name}"));
Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin", Map row = AbstractDataImportHandlerTest.createMap(
"middleName", "Shekhar", "lastName", "Mangar"); "firstName", "Shalin",
"middleName", "Shekhar",
"lastName", "Mangar");
VariableResolverImpl resolver = new VariableResolverImpl(); VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap( Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"name", "e"); "name", "e");
@ -55,6 +61,7 @@ public class TestTemplateTransformer {
null, 0, fields, entityAttrs); null, 0, fields, entityAttrs);
new TemplateTransformer().transformRow(row, context); new TemplateTransformer().transformRow(row, context);
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name")); Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname"));
} }
} }