diff --git a/contrib/dataimporthandler/CHANGES.txt b/contrib/dataimporthandler/CHANGES.txt index f672ab38eee..ff9e65319e7 100644 --- a/contrib/dataimporthandler/CHANGES.txt +++ b/contrib/dataimporthandler/CHANGES.txt @@ -82,6 +82,10 @@ New Features 18.SOLR-783: Enhance delta-imports by maintaining separate last_index_time for each entity. (Jon Baer, Noble Paul via shalin) +19.SOLR-1033: Current entity's namespace is made available to all Transformers. This allows one to use an output field + of TemplateTransformer in other transformers, among other things. + (Fergus McMenemie, Noble Paul via shalin) + Optimizations ---------------------- 1. SOLR-846: Reduce memory consumption during delta import by removing keys when used diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java index 278584b752c..c0b9c976e39 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java @@ -51,9 +51,6 @@ public class EntityProcessorBase extends EntityProcessor { protected String query; - @SuppressWarnings("unchecked") - private Map session; - protected String onError = ABORT; public void init(Context context) { @@ -67,7 +64,6 @@ public class EntityProcessorBase extends EntityProcessor { } resolver = (VariableResolverImpl) context.getVariableResolver(); query = null; - session = null; isFirstInit = false; } @@ -169,6 +165,7 @@ public class EntityProcessorBase extends EntityProcessor { if (rows != null) { List> tmpRows = new ArrayList>(); for (Map map : rows) { + resolver.addNamespace(entityName, map); Object o = t.transformRow(map, context); if (o == null) continue; @@ -184,6 +181,7 @@ public class EntityProcessorBase extends EntityProcessor { } rows = tmpRows; } else { + resolver.addNamespace(entityName, transformedRow); Object o = t.transformRow(transformedRow, context); if (o == null) return null; @@ -253,19 +251,6 @@ public class EntityProcessorBase extends EntityProcessor { return null; } - public void setSessionAttribute(Object key, Object val) { - if (session == null) { - session = new HashMap(); - } - session.put(key, val); - } - - public Object getSessionAttribute(Object key) { - if (session == null) - return null; - return session.get(key); - } - /** * For a simple implementation, this is the only method that the sub-class should implement. This is intended to * stream rows one-by-one. Return null to signal end of rows @@ -282,14 +267,6 @@ public class EntityProcessorBase extends EntityProcessor { /*no op*/ } - /** - * Clears the internal session maintained by this EntityProcessor - */ - public void clearSession() { - if (session != null) - session.clear(); - } - /** * Only used by cache implementations */ diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java index 5a7d52eb7c6..cc55c8e2da1 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/TemplateTransformer.java @@ -53,26 +53,10 @@ public class TemplateTransformer extends Transformer { @SuppressWarnings("unchecked") public Object transformRow(Map row, Context context) { - String entityName = context.getEntityAttribute(DataImporter.NAME); - VariableResolverImpl resolver = (VariableResolverImpl) context .getVariableResolver(); - Map resolverMap = (Map) resolver - .resolve(entityName); - - // Clone resolver map because the resolver map contains common fields or any - // others - // that the entity processor chooses to keep. - Map resolverMapCopy = new HashMap(); - if (resolverMap != null) { - for (Map.Entry entry : resolverMap.entrySet()) - resolverMapCopy.put(entry.getKey(), entry.getValue()); - } // Add current row to the copy of resolver map - for (Map.Entry entry : row.entrySet()) - resolverMapCopy.put(entry.getKey(), entry.getValue()); - // Add this copy to the namespace of the current entity in the resolver - resolver.addNamespace(entityName, resolverMapCopy); +// for (Map.Entry entry : row.entrySet()) for (Map map : context.getAllEntityFields()) { String expr = map.get(TEMPLATE); @@ -98,8 +82,6 @@ public class TemplateTransformer extends Transformer { row.put(column, resolver.replaceTokens(expr)); } - // Restore the original resolver map - resolver.addNamespace(entityName, resolverMap); return row; } diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java index 67997e097ca..908f83d21ad 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathEntityProcessor.java @@ -182,15 +182,12 @@ public class XPathEntityProcessor extends EntityProcessorBase { initQuery(resolver.replaceTokens(context.getEntityAttribute(URL))); r = getNext(); if (r == null) { - Object hasMore = getSessionAttribute(HAS_MORE); + Object hasMore = context.getSessionAttribute(HAS_MORE, Context.SCOPE_ENTITY); if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) { - String url = (String) getSessionAttribute(NEXT_URL); + String url = (String) context.getSessionAttribute(NEXT_URL, Context.SCOPE_ENTITY); if (url == null) url = context.getEntityAttribute(URL); - Map namespace = (Map) getSessionAttribute(entityName); - if (namespace != null) - resolver.addNamespace(entityName, namespace); - clearSession(); + addNamespace(); initQuery(resolver.replaceTokens(url)); r = getNext(); if (r == null) @@ -199,12 +196,41 @@ public class XPathEntityProcessor extends EntityProcessorBase { return null; } } + addCommonFields(r); r = applyTransformer(r); if (r != null) return readUsefulVars(r); } } + private void addNamespace() { + Map namespace = new HashMap(); + Set allNames = new HashSet(); + if (commonFields != null) allNames.addAll(commonFields); + if (placeHolderVariables != null) allNames.addAll(placeHolderVariables); + if(allNames.isEmpty()) return; + + for (String name : allNames) { + Object val = context.getSessionAttribute(name, Context.SCOPE_ENTITY); + if (val != null) namespace.put(name, val); + } + resolver.addNamespace(entityName, namespace); + + } + + private void addCommonFields(Map r) { + if(commonFields != null){ + for (String commonField : commonFields) { + if(r.get(commonField) == null) { + Object val = context.getSessionAttribute(commonField, Context.SCOPE_ENTITY); + if(val != null) r.put(commonField, val); + } + + } + } + + } + private void initQuery(String s) { Reader data = null; try { @@ -251,8 +277,8 @@ public class XPathEntityProcessor extends EntityProcessorBase { } }); } catch (Exception e) { - String msg = "Parsing failed for xml, url:" + s + "rows processed :" + rows.size(); - if (rows.size() > 0) msg += "last row : " + rows.get(rows.size() - 1); + String msg = "Parsing failed for xml, url:" + s + " rows processed:" + rows.size(); + if (rows.size() > 0) msg += " last row: " + rows.get(rows.size() - 1); if (ABORT.equals(onError)) { wrapAndThrow(SEVERE, e, msg); } else if (SKIP.equals(onError)) { @@ -321,28 +347,21 @@ public class XPathEntityProcessor extends EntityProcessorBase { private Map readUsefulVars(Map r) { Object val = r.get(HAS_MORE); if (val != null) - setSessionAttribute(HAS_MORE, val); + context.setSessionAttribute(HAS_MORE, val,Context.SCOPE_ENTITY); val = r.get(NEXT_URL); if (val != null) - setSessionAttribute(NEXT_URL, val); + context.setSessionAttribute(NEXT_URL, val,Context.SCOPE_ENTITY); if (placeHolderVariables != null) { - Map namespace = getNameSpace(); for (String s : placeHolderVariables) { val = r.get(s); - if (val != null) - namespace.put(s, val); + context.setSessionAttribute(s, val,Context.SCOPE_ENTITY); } } if (commonFields != null) { for (String s : commonFields) { Object commonVal = r.get(s); if (commonVal != null) { - setSessionAttribute(s, commonVal); - getNameSpace().put(s, commonVal); - } else { - commonVal = getSessionAttribute(s); - if (commonVal != null) - r.put(s, commonVal); + context.setSessionAttribute(s, commonVal,Context.SCOPE_ENTITY); } } } @@ -395,8 +414,8 @@ public class XPathEntityProcessor extends EntityProcessorBase { if (row == null || row == Collections.EMPTY_MAP) { isEnd.set(true); if (exp.get() != null) { - String msg = "Parsing failed for xml, url:" + s + "rows processed in this xml:" + count; - if (lastRow != null) msg += "last row in this xml: " + lastRow; + String msg = "Parsing failed for xml, url:" + s + " rows processed in this xml:" + count; + if (lastRow != null) msg += " last row in this xml:" + lastRow; if (ABORT.equals(onError)) { wrapAndThrow(SEVERE, exp.get(), msg); } else if (SKIP.equals(onError)) { @@ -422,15 +441,6 @@ public class XPathEntityProcessor extends EntityProcessorBase { } - @SuppressWarnings("unchecked") - private Map getNameSpace() { - Map namespace = (Map) getSessionAttribute(entityName); - if (namespace == null) { - namespace = new HashMap(); - setSessionAttribute(entityName, namespace); - } - return namespace; - } public static final String URL = "url"; diff --git a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java index 20b375a6658..76ed8056abe 100644 --- a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java +++ b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestRegexTransformer.java @@ -25,9 +25,7 @@ import java.util.List; import java.util.Map; /** - *

- * Test for RegexTransformer - *

+ *

Test for RegexTransformer

* * @version $Id$ * @since solr 1.3 @@ -37,14 +35,14 @@ public class TestRegexTransformer { @Test public void commaSeparated() { List> fields = new ArrayList>(); + // fields.add(getField("col1", "string", null, "a", ",")); - Context context = AbstractDataImportHandlerTest.getContext(null, null, - null, 0, fields, null); + Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null); + Map src = new HashMap(); - String s = "a,bb,cc,d"; - src.put("a", s); - Map result = new RegexTransformer().transformRow(src, - context); + src.put("a", "a,bb,cc,d"); + + Map result = new RegexTransformer().transformRow(src, context); Assert.assertEquals(2, result.size()); Assert.assertEquals(4, ((List) result.get("col1")).size()); } @@ -52,14 +50,17 @@ public class TestRegexTransformer { @Test public void replaceWith() { List> fields = new ArrayList>(); + // Map fld = getField("name", "string", "'", null, null); fld.put("replaceWith", "''"); fields.add(fld); Context context = AbstractDataImportHandlerTest.getContext(null, null, null, 0, fields, null); + Map src = new HashMap(); String s = "D'souza"; src.put("name", s); + Map result = new RegexTransformer().transformRow(src, context); Assert.assertEquals("D''souza", result.get("name")); @@ -67,36 +68,57 @@ public class TestRegexTransformer { @Test public void mileage() { - Context context = AbstractDataImportHandlerTest.getContext(null, null, - null, 0, getFields(), null); + List> fields = getFields(); - Map src = new HashMap(); + // add another regex which reuses result from previous regex again! + // + Map fld = getField("hltCityMPG", "string", + ".*(${e.city_mileage})", "rowdata", null); + fld.put("replaceWith", "*** $1 ***"); + fields.add(fld); + + Map row = new HashMap(); String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City"; - src.put("rowdata", s); - Map result = new RegexTransformer().transformRow(src, - context); - Assert.assertEquals(3, result.size()); + row.put("rowdata", s); + + VariableResolverImpl resolver = new VariableResolverImpl(); + resolver.addNamespace("e", row); + Map eAttrs = AbstractDataImportHandlerTest.createMap("name", "e"); + Context context = AbstractDataImportHandlerTest.getContext(null, resolver, null, 0, fields, eAttrs); + + Map result = new RegexTransformer().transformRow(row, context); + Assert.assertEquals(4, result.size()); Assert.assertEquals(s, result.get("rowdata")); Assert.assertEquals("26", result.get("highway_mileage")); Assert.assertEquals("19", result.get("city_mileage")); - + Assert.assertEquals("*** 19 *** mpg City", result.get("hltCityMPG")); } public static List> getFields() { List> fields = new ArrayList>(); + + // + fields.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null)); + + // fields.add(getField("rowdata", "string", null, "rowdata", null)); return fields; - } public static Map getField(String col, String type, diff --git a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java index b17fd69d14a..a23ef361270 100644 --- a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java +++ b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestTemplateTransformer.java @@ -43,11 +43,17 @@ public class TestTemplateTransformer { fields.add(AbstractDataImportHandlerTest.createMap("column", "name", TemplateTransformer.TEMPLATE, "${e.lastName}, ${e.firstName} ${e.middleName}")); - - Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin", - "middleName", "Shekhar", "lastName", "Mangar"); + // test reuse of template output in another template + fields.add(AbstractDataImportHandlerTest.createMap("column", "mrname", + TemplateTransformer.TEMPLATE,"Mr ${e.name}")); + + Map row = AbstractDataImportHandlerTest.createMap( + "firstName", "Shalin", + "middleName", "Shekhar", + "lastName", "Mangar"); VariableResolverImpl resolver = new VariableResolverImpl(); + resolver.addNamespace("e", row); Map entityAttrs = AbstractDataImportHandlerTest.createMap( "name", "e"); @@ -55,6 +61,7 @@ public class TestTemplateTransformer { null, 0, fields, entityAttrs); new TemplateTransformer().transformRow(row, context); Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name")); + Assert.assertEquals("Mr Mangar, Shalin Shekhar", row.get("mrname")); } }