SOLR-1033 -- Current entity's namespace is made available to all Transformers. This allows one to use an output field of TemplateTransformer in other transformers, among other things.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@747664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2009-02-25 05:27:31 +00:00
parent 19d7b267a7
commit c8e94bec4b
6 changed files with 99 additions and 97 deletions

View File

@ -82,6 +82,10 @@ New Features
18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity.
(Jon Baer, Noble Paul via shalin)
19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field
of TemplateTransformer in other transformers, among other things.
(Fergus McMenemie, Noble Paul via shalin)
Optimizations
----------------------
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used

View File

@ -51,9 +51,6 @@ public class EntityProcessorBase extends EntityProcessor {
protected String query;
@SuppressWarnings("unchecked")
private Map session;
protected String onError = ABORT;
public void init(Context context) {
@ -67,7 +64,6 @@ public class EntityProcessorBase extends EntityProcessor {
}
resolver = (VariableResolverImpl) context.getVariableResolver();
query = null;
session = null;
isFirstInit = false;
}
@ -169,6 +165,7 @@ public class EntityProcessorBase extends EntityProcessor {
if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
for (Map<String, Object> map : rows) {
resolver.addNamespace(entityName, map);
Object o = t.transformRow(map, context);
if (o == null)
continue;
@ -184,6 +181,7 @@ public class EntityProcessorBase extends EntityProcessor {
}
rows = tmpRows;
} else {
resolver.addNamespace(entityName, transformedRow);
Object o = t.transformRow(transformedRow, context);
if (o == null)
return null;
@ -253,19 +251,6 @@ public class EntityProcessorBase extends EntityProcessor {
return null;
}
public void setSessionAttribute(Object key, Object val) {
if (session == null) {
session = new HashMap();
}
session.put(key, val);
}
public Object getSessionAttribute(Object key) {
if (session == null)
return null;
return session.get(key);
}
/**
* For a simple implementation, this is the only method that the sub-class should implement. This is intended to
* stream rows one-by-one. Return null to signal end of rows
@ -282,14 +267,6 @@ public class EntityProcessorBase extends EntityProcessor {
/*no op*/
}
/**
* Clears the internal session maintained by this EntityProcessor
*/
public void clearSession() {
if (session != null)
session.clear();
}
/**
* Only used by cache implementations
*/

View File

@ -53,26 +53,10 @@ public class TemplateTransformer extends Transformer {
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
String entityName = context.getEntityAttribute(DataImporter.NAME);
VariableResolverImpl resolver = (VariableResolverImpl) context
.getVariableResolver();
Map<String, Object> resolverMap = (Map<String, Object>) resolver
.resolve(entityName);
// Clone resolver map because the resolver map contains common fields or any
// others
// that the entity processor chooses to keep.
Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
if (resolverMap != null) {
for (Map.Entry<String, Object> entry : resolverMap.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
}
// Add current row to the copy of resolver map
for (Map.Entry<String, Object> entry : row.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
// Add this copy to the namespace of the current entity in the resolver
resolver.addNamespace(entityName, resolverMapCopy);
// for (Map.Entry<String, Object> entry : row.entrySet())
for (Map<String, String> map : context.getAllEntityFields()) {
String expr = map.get(TEMPLATE);
@ -98,8 +82,6 @@ public class TemplateTransformer extends Transformer {
row.put(column, resolver.replaceTokens(expr));
}
// Restore the original resolver map
resolver.addNamespace(entityName, resolverMap);
return row;
}

View File

@ -182,15 +182,12 @@ public class XPathEntityProcessor extends EntityProcessorBase {
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
r = getNext();
if (r == null) {
Object hasMore = getSessionAttribute(HAS_MORE);
Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY);
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
String url = (String) getSessionAttribute(NEXT_URL);
String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY);
if (url == null)
url = context.getEntityAttribute(URL);
Map namespace = (Map) getSessionAttribute(entityName);
if (namespace != null)
resolver.addNamespace(entityName, namespace);
clearSession();
addNamespace();
initQuery(resolver.replaceTokens(url));
r = getNext();
if (r == null)
@ -199,12 +196,41 @@ public class XPathEntityProcessor extends EntityProcessorBase {
return null;
}
}
addCommonFields(r);
r = applyTransformer(r);
if (r != null)
return readUsefulVars(r);
}
}
private void addNamespace() {
Map<String, Object> namespace = new HashMap<String, Object>();
Set<String> allNames = new HashSet<String>();
if (commonFields != null) allNames.addAll(commonFields);
if (placeHolderVariables != null) allNames.addAll(placeHolderVariables);
if(allNames.isEmpty()) return;
for (String name : allNames) {
Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY);
if (val != null) namespace.put(name, val);
}
resolver.addNamespace(entityName, namespace);
}
private void addCommonFields(Map<String, Object> r) {
if(commonFields != null){
for (String commonField : commonFields) {
if(r.get(commonField) == null) {
Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY);
if(val != null) r.put(commonField, val);
}
}
}
}
private void initQuery(String s) {
Reader data = null;
try {
@ -251,8 +277,8 @@ public class XPathEntityProcessor extends EntityProcessorBase {
}
});
} catch (Exception e) {
String msg = "Parsing failed for xml, url:" + s + "rows processed :" + rows.size();
if (rows.size() > 0) msg += "last row : " + rows.get(rows.size() - 1);
String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size();
if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1);
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e, msg);
} else if (SKIP.equals(onError)) {
@ -321,28 +347,21 @@ public class XPathEntityProcessor extends EntityProcessorBase {
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
Object val = r.get(HAS_MORE);
if (val != null)
setSessionAttribute(HAS_MORE, val);
context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY);
val = r.get(NEXT_URL);
if (val != null)
setSessionAttribute(NEXT_URL, val);
context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY);
if (placeHolderVariables != null) {
Map namespace = getNameSpace();
for (String s : placeHolderVariables) {
val = r.get(s);
if (val != null)
namespace.put(s, val);
context.setSessionAttribute(s, val,Context.SCOPE_ENTITY);
}
}
if (commonFields != null) {
for (String s : commonFields) {
Object commonVal = r.get(s);
if (commonVal != null) {
setSessionAttribute(s, commonVal);
getNameSpace().put(s, commonVal);
} else {
commonVal = getSessionAttribute(s);
if (commonVal != null)
r.put(s, commonVal);
context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY);
}
}
}
@ -395,8 +414,8 @@ public class XPathEntityProcessor extends EntityProcessorBase {
if (row == null || row == Collections.EMPTY_MAP) {
isEnd.set(true);
if (exp.get() != null) {
String msg = "Parsing failed for xml, url:" + s + "rows processed in this xml:" + count;
if (lastRow != null) msg += "last row in this xml: " + lastRow;
String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count;
if (lastRow != null) msg += " last row in this xml:" + lastRow;
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, exp.get(), msg);
} else if (SKIP.equals(onError)) {
@ -422,15 +441,6 @@ public class XPathEntityProcessor extends EntityProcessorBase {
}
@SuppressWarnings("unchecked")
private Map getNameSpace() {
Map namespace = (Map) getSessionAttribute(entityName);
if (namespace == null) {
namespace = new HashMap();
setSessionAttribute(entityName, namespace);
}
return namespace;
}
public static final String URL = "url";

View File

@ -25,9 +25,7 @@ import java.util.List;
import java.util.Map;
/**
* <p>
* Test for RegexTransformer
* </p>
* <p> Test for RegexTransformer </p>
*
* @version $Id$
* @since solr 1.3
@ -37,14 +35,14 @@ public class TestRegexTransformer {
@Test
public void commaSeparated() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="col1" sourceColName="a" splitBy="," />
fields.add(getField("col1", "string", null, "a", ","));
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null);
Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>();
String s = "a,bb,cc,d";
src.put("a", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
src.put("a", "a,bb,cc,d");
Map<String, Object> result = new RegexTransformer().transformRow(src, context);
Assert.assertEquals(2, result.size());
Assert.assertEquals(4, ((List) result.get("col1")).size());
}
@ -52,14 +50,17 @@ public class TestRegexTransformer {
@Test
public void replaceWith() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="name" sourceColName="a" regexp="'" replaceWith="''" />
Map<String, String> fld = getField("name", "string", "'", null, null);
fld.put("replaceWith", "''");
fields.add(fld);
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>();
String s = "D'souza";
src.put("name", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals("D''souza", result.get("name"));
@ -67,36 +68,57 @@ public class TestRegexTransformer {
@Test
public void mileage() {
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, getFields(), null);
List<Map<String, String>> fields = getFields();
Map<String, Object> src = new HashMap<String, Object>();
// add another regex which reuses result from previous regex again!
// <field column="hltCityMPG" sourceColName="rowdata" regexp="(${e.city_mileage})" />
Map<String, String> fld = getField("hltCityMPG", "string",
".*(${e.city_mileage})", "rowdata", null);
fld.put("replaceWith", "*** $1 ***");
fields.add(fld);
Map<String, Object> row = new HashMap<String, Object>();
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
src.put("rowdata", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals(3, result.size());
row.put("rowdata", s);
VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Map<String, String> eAttrs = AbstractDataImportHandlerTest.createMap("name", "e");
Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs);
Map<String, Object> result = new RegexTransformer().transformRow(row, context);
Assert.assertEquals(4, result.size());
Assert.assertEquals(s, result.get("rowdata"));
Assert.assertEquals("26", result.get("highway_mileage"));
Assert.assertEquals("19", result.get("city_mileage"));
Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG"));
}
public static List<Map<String, String>> getFields() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
// <field column="city_mileage" sourceColName="rowdata" regexp=
// "Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City"
fields.add(getField("city_mileage", "sint",
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
"rowdata", null));
// <field column="highway_mileage" sourceColName="rowdata" regexp=
// "Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City"
fields.add(getField("highway_mileage", "sint",
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
"rowdata", null));
// <field column="seating_capacity" sourceColName="rowdata" regexp="Seating capacity:(.*)"
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
"rowdata", null));
fields
.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
// <field column="warranty" sourceColName="rowdata" regexp="Warranty:(.*)" />
fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
// <field column="rowdata" sourceColName="rowdata" />
fields.add(getField("rowdata", "string", null, "rowdata", null));
return fields;
}
public static Map<String, String> getField(String col, String type,

View File

@ -43,11 +43,17 @@ public class TestTemplateTransformer {
fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
TemplateTransformer.TEMPLATE,
"${e.lastName}, ${e.firstName} ${e.middleName}"));
Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
"middleName", "Shekhar", "lastName", "Mangar");
// test reuse of template output in another template
fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname",
TemplateTransformer.TEMPLATE,"Mr ${e.name}"));
Map row = AbstractDataImportHandlerTest.createMap(
"firstName", "Shalin",
"middleName", "Shekhar",
"lastName", "Mangar");
VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"name", "e");
@ -55,6 +61,7 @@ public class TestTemplateTransformer {
null, 0, fields, entityAttrs);
new TemplateTransformer().transformRow(row, context);
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname"));
}
}